In [1]:
!cp -r ../input/d/romanofrancesco/recsys-repo/RecSys_Course_AT_PoliMi-master/* ./

In [2]:
%config Completer.use_jedi = False
import pandas as pd
import numpy as np
import scipy.sparse as sps
import matplotlib.pyplot as pyplot
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

In [3]:
#load data_train, matrix of users interaction
URM_path = "../input/recommender-system-2021-challenge-polimi/data_train.csv"
URM_all_dataframe = pd.read_csv(filepath_or_buffer=URM_path, 
                                sep=",",
                                dtype={0:int, 1:int, 2:float},
                                header=0)
URM_all_dataframe.columns = ["UserID", "ItemID", "Interaction"]

In [4]:
URM_all_dataframe.head()


Unnamed: 0,UserID,ItemID,Interaction
0,0,53,1.0
1,0,209,1.0
2,0,223,1.0
3,0,249,1.0
4,0,435,1.0


In [5]:
userID_unique = URM_all_dataframe["UserID"].unique()
itemID_unique = URM_all_dataframe["ItemID"].unique()

n_users = len(userID_unique)
n_items = len(itemID_unique)
n_interactions = len(URM_all_dataframe)

print ("Number of items\t {}, Number of users\t {}".format(n_items, n_users))
print ("Max ID items\t {}, Max Id users\t {}\n".format(max(itemID_unique), max(userID_unique)))
print ("Average interactions per user {:.2f}".format(n_interactions/n_users))
print ("Average interactions per item {:.2f}\n".format(n_interactions/n_items))

print ("Sparsity {:.2f} %".format((1-float(n_interactions)/(n_items*n_users))*100))

Number of items	 18059, Number of users	 13650
Max ID items	 18058, Max Id users	 13649

Average interactions per user 387.23
Average interactions per item 292.69

Sparsity 97.86 %


In [6]:
URM_all = sps.coo_matrix((URM_all_dataframe["Interaction"].values, 
                          (URM_all_dataframe["UserID"].values, URM_all_dataframe["ItemID"].values)))
URM_all = URM_all.tocsr() # to obtain fast access to rows (users)
URM_all

<13650x18059 sparse matrix of type '<class 'numpy.float64'>'
	with 5285664 stored elements in Compressed Sparse Row format>

In [7]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

# split data into train and validation data 80/20
URM_train, URM_valid = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.80)



In [8]:
from Evaluation.Evaluator import EvaluatorHoldout

#create an evaluator object to evaluate validation set
#we will use it for hyperparameter tuning
evaluator_valid = EvaluatorHoldout(URM_valid, cutoff_list=[10])

EvaluatorHoldout: Ignoring 13646 ( 0.0%) Users that have less than 1 test interactions


In [9]:
from Recommenders.MatrixFactorization.IALSRecommender import IALSRecommender

#fit the IALS model tuned in previous experiments
mf_ials = IALSRecommender(URM_train)
mf_ials.fit(num_factors = 31,
            confidence_scaling= 'log',
            alpha = 0.0024941846820976015,
            epsilon = 3.449297756742473,
            reg = 5.61162089901928e-05,
            epochs = 40)

IALSRecommender: Epoch 1 of 40. Elapsed time 7.59 sec
IALSRecommender: Epoch 2 of 40. Elapsed time 15.14 sec
IALSRecommender: Epoch 3 of 40. Elapsed time 22.75 sec
IALSRecommender: Epoch 4 of 40. Elapsed time 31.61 sec
IALSRecommender: Epoch 5 of 40. Elapsed time 39.33 sec
IALSRecommender: Epoch 6 of 40. Elapsed time 46.95 sec
IALSRecommender: Epoch 7 of 40. Elapsed time 54.66 sec
IALSRecommender: Epoch 8 of 40. Elapsed time 1.06 min
IALSRecommender: Epoch 9 of 40. Elapsed time 1.18 min
IALSRecommender: Epoch 10 of 40. Elapsed time 1.31 min
IALSRecommender: Epoch 11 of 40. Elapsed time 1.44 min
IALSRecommender: Epoch 12 of 40. Elapsed time 1.58 min
IALSRecommender: Epoch 13 of 40. Elapsed time 1.71 min
IALSRecommender: Epoch 14 of 40. Elapsed time 1.83 min
IALSRecommender: Epoch 15 of 40. Elapsed time 1.96 min
IALSRecommender: Epoch 16 of 40. Elapsed time 2.11 min
IALSRecommender: Epoch 17 of 40. Elapsed time 2.23 min
IALSRecommender: Epoch 18 of 40. Elapsed time 2.36 min
IALSRecommend

In [10]:
#check evaluation on pure svd
evaluator_valid.evaluateRecommender(mf_ials)

EvaluatorHoldout: Processed 13646 (100.0%) in 15.36 sec. Users per second: 888


(       PRECISION PRECISION_RECALL_MIN_DEN    RECALL       MAP MAP_MIN_DEN  \
 cutoff                                                                      
 10      0.377327                 0.378693  0.065702  0.230622    0.231246   
 
              MRR      NDCG        F1  HIT_RATE ARHR_ALL_HITS  ...  \
 cutoff                                                        ...   
 10      0.631976  0.392387  0.111917  0.963066       1.18543  ...   
 
        COVERAGE_ITEM_CORRECT COVERAGE_USER COVERAGE_USER_CORRECT  \
 cutoff                                                             
 10                  0.039592      0.999707              0.962784   
 
        DIVERSITY_GINI SHANNON_ENTROPY RATIO_DIVERSITY_HERFINDAHL  \
 cutoff                                                             
 10           0.010426        7.930212                   0.993528   
 
        RATIO_DIVERSITY_GINI RATIO_SHANNON_ENTROPY RATIO_AVERAGE_POPULARITY  \
 cutoff                                                

In [None]:
#similarity_slimen = sps.load_npz("../input/slimen047/W_sparse.npz")
#similarity_slimen
#similarity_slimen_array = similarity_slimen['arr_0']

In [11]:
import numpy as np
import scipy.sparse as sps
from Recommenders.Recommender_utils import check_matrix
from sklearn.linear_model import ElasticNet
from Recommenders.BaseSimilarityMatrixRecommender import BaseItemSimilarityMatrixRecommender
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit
import time, sys
from tqdm import tqdm
from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning

# os.environ["PYTHONWARNINGS"] = ('ignore::exceptions.ConvergenceWarning:sklearn.linear_model')
# os.environ["PYTHONWARNINGS"] = ('ignore:Objective did not converge:ConvergenceWarning:')

class SLIMElasticNetRecommender(BaseItemSimilarityMatrixRecommender):
    """
    Train a Sparse Linear Methods (SLIM) item similarity model.
    NOTE: ElasticNet solver is parallel, a single intance of SLIM_ElasticNet will
          make use of half the cores available
    See:
        Efficient Top-N Recommendation by Linear Regression,
        M. Levy and K. Jack, LSRS workshop at RecSys 2013.
        SLIM: Sparse linear methods for top-n recommender systems,
        X. Ning and G. Karypis, ICDM 2011.
        http://glaros.dtc.umn.edu/gkhome/fetch/papers/SLIM2011icdm.pdf
    """

    RECOMMENDER_NAME = "SLIMElasticNetRecommender"

    def __init__(self, URM_train, verbose = True):
        super(SLIMElasticNetRecommender, self).__init__(URM_train, verbose = verbose)

    @ignore_warnings(category=ConvergenceWarning)
    def fit(self, l1_ratio=0.1, alpha = 1.0, positive_only=True, topK = 100,**earlystopping_kwargs):

        assert l1_ratio>= 0 and l1_ratio<=1, "{}: l1_ratio must be between 0 and 1, provided value was {}".format(self.RECOMMENDER_NAME, l1_ratio)

        self.l1_ratio = l1_ratio
        self.positive_only = positive_only
        self.topK = topK


        # initialize the ElasticNet model
        self.model = ElasticNet(alpha=alpha,
                                l1_ratio=self.l1_ratio,
                                positive=self.positive_only,
                                fit_intercept=False,
                                copy_X=False,
                                precompute=True,
                                selection='random',
                                max_iter=100,
                                tol=1e-4)

        URM_train = check_matrix(self.URM_train, 'csc', dtype=np.float32)

        n_items = URM_train.shape[1]

        # Use array as it reduces memory requirements compared to lists
        dataBlock = 10000000

        rows = np.zeros(dataBlock, dtype=np.int32)
        cols = np.zeros(dataBlock, dtype=np.int32)
        values = np.zeros(dataBlock, dtype=np.float32)

        numCells = 0

        start_time = time.time()
        start_time_printBatch = start_time

        # fit each item's factors sequentially (not in parallel)
        for currentItem in range(n_items):

            # get the target column
            y = URM_train[:, currentItem].toarray()

            # set the j-th column of X to zero
            start_pos = URM_train.indptr[currentItem]
            end_pos = URM_train.indptr[currentItem + 1]

            current_item_data_backup = URM_train.data[start_pos: end_pos].copy()
            URM_train.data[start_pos: end_pos] = 0.0

            # fit one ElasticNet model per column
            self.model.fit(URM_train, y)

            # self.model.coef_ contains the coefficient of the ElasticNet model
            # let's keep only the non-zero values

            # Select topK values
            # Sorting is done in three steps. Faster then plain np.argsort for higher number of items
            # - Partition the data to extract the set of relevant items
            # - Sort only the relevant items
            # - Get the original item index

            nonzero_model_coef_index = self.model.sparse_coef_.indices
            nonzero_model_coef_value = self.model.sparse_coef_.data

            local_topK = min(len(nonzero_model_coef_value)-1, self.topK)

            relevant_items_partition = (-nonzero_model_coef_value).argpartition(local_topK)[0:local_topK]
            relevant_items_partition_sorting = np.argsort(-nonzero_model_coef_value[relevant_items_partition])
            ranking = relevant_items_partition[relevant_items_partition_sorting]

            for index in range(len(ranking)):

                if numCells == len(rows):
                    rows = np.concatenate((rows, np.zeros(dataBlock, dtype=np.int32)))
                    cols = np.concatenate((cols, np.zeros(dataBlock, dtype=np.int32)))
                    values = np.concatenate((values, np.zeros(dataBlock, dtype=np.float32)))


                rows[numCells] = nonzero_model_coef_index[ranking[index]]
                cols[numCells] = currentItem
                values[numCells] = nonzero_model_coef_value[ranking[index]]

                numCells += 1

            # finally, replace the original values of the j-th column
            URM_train.data[start_pos:end_pos] = current_item_data_backup

            elapsed_time = time.time() - start_time
            new_time_value, new_time_unit = seconds_to_biggest_unit(elapsed_time)


            if time.time() - start_time_printBatch > 300 or currentItem == n_items-1:
                self._print("Processed {} ({:4.1f}%) in {:.2f} {}. Items per second: {:.2f}".format(
                    currentItem+1,
                    100.0* float(currentItem+1)/n_items,
                    new_time_value,
                    new_time_unit,
                    float(currentItem)/elapsed_time))

                sys.stdout.flush()
                sys.stderr.flush()

                start_time_printBatch = time.time()

        # generate the sparse weight matrix
        self.W_sparse = sps.csr_matrix((values[:numCells], (rows[:numCells], cols[:numCells])),
                                       shape=(n_items, n_items), dtype=np.float32)

In [12]:
# create a Slim elasticnet object
slim_en = SLIMElasticNetRecommender(URM_train)
slim_en.fit(epochs = 500, l1_ratio = 0.0023170159712850467, alpha = 0.09078974149197175, positive_only = True, topK = 363)

SLIMElasticNetRecommender: Processed 1490 ( 8.3%) in 5.00 min. Items per second: 4.96
SLIMElasticNetRecommender: Processed 2983 (16.5%) in 10.00 min. Items per second: 4.97
SLIMElasticNetRecommender: Processed 4486 (24.8%) in 15.00 min. Items per second: 4.98
SLIMElasticNetRecommender: Processed 5988 (33.2%) in 20.01 min. Items per second: 4.99
SLIMElasticNetRecommender: Processed 7468 (41.4%) in 25.01 min. Items per second: 4.98
SLIMElasticNetRecommender: Processed 8962 (49.6%) in 30.01 min. Items per second: 4.98
SLIMElasticNetRecommender: Processed 10441 (57.8%) in 35.01 min. Items per second: 4.97
SLIMElasticNetRecommender: Processed 11913 (66.0%) in 40.01 min. Items per second: 4.96
SLIMElasticNetRecommender: Processed 13395 (74.2%) in 45.02 min. Items per second: 4.96
SLIMElasticNetRecommender: Processed 14874 (82.4%) in 50.02 min. Items per second: 4.96
SLIMElasticNetRecommender: Processed 16331 (90.4%) in 55.02 min. Items per second: 4.95
SLIMElasticNetRecommender: Processed 17

In [13]:
evaluator_valid.evaluateRecommender(slim_en)

EvaluatorHoldout: Processed 13646 (100.0%) in 26.93 sec. Users per second: 507


(       PRECISION PRECISION_RECALL_MIN_DEN    RECALL       MAP MAP_MIN_DEN  \
 cutoff                                                                      
 10      0.399399                 0.401148  0.071367  0.247228    0.248054   
 
              MRR      NDCG        F1 HIT_RATE ARHR_ALL_HITS  ...  \
 cutoff                                                       ...   
 10      0.645532  0.413152  0.121096  0.97208      1.240785  ...   
 
        COVERAGE_ITEM_CORRECT COVERAGE_USER COVERAGE_USER_CORRECT  \
 cutoff                                                             
 10                  0.050944      0.999707              0.971795   
 
        DIVERSITY_GINI SHANNON_ENTROPY RATIO_DIVERSITY_HERFINDAHL  \
 cutoff                                                             
 10           0.010592         7.84549                   0.991782   
 
        RATIO_DIVERSITY_GINI RATIO_SHANNON_ENTROPY RATIO_AVERAGE_POPULARITY  \
 cutoff                                                   

In [14]:
# explore scores differences between the two models
user_id= 50
item_scores_slim_en = slim_en._compute_item_score(user_id)
item_scores_slim_en

array([[0.00012783, 0.00076198, 0.00099631, ..., 0.        , 0.        ,
        0.00071903]], dtype=float32)

In [15]:
item_scores_IALS = mf_ials._compute_item_score(user_id)
item_scores_IALS

array([-0.00183975, -0.00347735,  0.00030257, ...,  0.00323762,
        0.00281395, -0.01231717])

In [20]:
# create a recommender object which performs the hybridation of the 2 models
from Recommenders.BaseRecommender import BaseRecommender

class ScoresHybridRecommender(BaseRecommender):
    """ ScoresHybridRecommender
    Hybrid of two prediction scores R = R1*alpha + R2*(1-alpha)

    """

    RECOMMENDER_NAME = "ScoresHybridRecommender"

    def __init__(self, URM_train, recommender_1, recommender_2):
        super(ScoresHybridRecommender, self).__init__(URM_train)

        self.URM_train = sps.csr_matrix(URM_train)
        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
        
        
    def fit(self, alpha = 0.5):
        self.alpha = alpha      


    def _compute_item_score(self, user_id_array, items_to_compute):
        
        # In a simple extension this could be a loop over a list of pretrained recommender objects
        item_weights_1 = self.recommender_1._compute_item_score(user_id_array)
        item_weights_2 = self.recommender_2._compute_item_score(user_id_array)

        item_weights = item_weights_1*self.alpha + item_weights_2*(1-self.alpha)

        return item_weights

In [21]:
# grid search over different values of hyperparameter alpha

for alpha in np.arange(0,1,0.1):
    scoreshybridrecommender = ScoresHybridRecommender(URM_train, slim_en, mf_ials)
    scoreshybridrecommender.fit(alpha = alpha)
    print("Hybrid model with alpha weight:")
    print(alpha)
    print(evaluator_valid.evaluateRecommender(scoreshybridrecommender))

Hybrid model with alpha weight:
0.0
EvaluatorHoldout: Processed 13646 (100.0%) in 28.71 sec. Users per second: 475
(       PRECISION PRECISION_RECALL_MIN_DEN    RECALL       MAP MAP_MIN_DEN  \
cutoff                                                                      
10      0.377327                 0.378693  0.065702  0.230622    0.231246   

             MRR      NDCG        F1  HIT_RATE ARHR_ALL_HITS  ...  \
cutoff                                                        ...   
10      0.631976  0.392387  0.111917  0.963066       1.18543  ...   

       COVERAGE_ITEM_CORRECT COVERAGE_USER COVERAGE_USER_CORRECT  \
cutoff                                                             
10                  0.039592      0.999707              0.962784   

       DIVERSITY_GINI SHANNON_ENTROPY RATIO_DIVERSITY_HERFINDAHL  \
cutoff                                                             
10           0.010426        7.930212                   0.993528   

       RATIO_DIVERSITY_GINI RATIO_

LET'S TRY TO NORMALIZE SCORES

In [16]:
from numpy import linalg as LA
from Recommenders.BaseRecommender import BaseRecommender

class DifferentLossScoresHybridRecommender(BaseRecommender):
    """ ScoresHybridRecommender
    Hybrid of two prediction scores R = R1/norm*alpha + R2/norm*(1-alpha) where R1 and R2 come from
    algorithms trained on different loss functions.

    """

    RECOMMENDER_NAME = "DifferentLossScoresHybridRecommender"


    def __init__(self, URM_train, recommender_1, recommender_2):
        super(DifferentLossScoresHybridRecommender, self).__init__(URM_train)

        self.URM_train = sps.csr_matrix(URM_train)
        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
        
        
        
    def fit(self, norm, alpha = 0.5):

        self.alpha = alpha
        self.norm = norm


    def _compute_item_score(self, user_id_array, items_to_compute):
        
        item_weights_1 = self.recommender_1._compute_item_score(user_id_array)
        item_weights_2 = self.recommender_2._compute_item_score(user_id_array)

        norm_item_weights_1 = LA.norm(item_weights_1, self.norm)
        norm_item_weights_2 = LA.norm(item_weights_2, self.norm)
        
        
        if norm_item_weights_1 == 0:
            raise ValueError("Norm {} of item weights for recommender 1 is zero. Avoiding division by zero".format(self.norm))
        
        if norm_item_weights_2 == 0:
            raise ValueError("Norm {} of item weights for recommender 2 is zero. Avoiding division by zero".format(self.norm))
        
        item_weights = item_weights_1 / norm_item_weights_1 * self.alpha + item_weights_2 / norm_item_weights_2 * (1-self.alpha)

        return item_weights

In [18]:
recommender_object = DifferentLossScoresHybridRecommender(URM_train, slim_en, mf_ials)

best_model = {
    "MAP" : 0,
    "alpha" : 0,
    "norm" : 0
}

for norm in [1, 2]:
    for alpha in np.arange(0,1,0.1):
        print("----")
        recommender_object.fit(norm, alpha)

        result_df, _ = evaluator_valid.evaluateRecommender(recommender_object)
        print("Norm: {}, Alpha: {}, Result: {}".format(norm, alpha, result_df.loc[10]["MAP"]))

        if result_df.loc[10]["MAP"] > best_model["MAP"]:
            best_model["MAP"] = result_df.loc[10]["MAP"]
            best_model["alpha"] = alpha
            best_model["norm"] = norm
print("----")
print("Best model has MAP: {} with alpha: {}, norm: {}".format(best_model["MAP"], best_model["alpha"], best_model["norm"]))

----
EvaluatorHoldout: Processed 13646 (100.0%) in 31.22 sec. Users per second: 437
Norm: 1, Alpha: 0.0, Result: 0.2306217561283151
----
EvaluatorHoldout: Processed 13646 (100.0%) in 32.09 sec. Users per second: 425
Norm: 1, Alpha: 0.1, Result: 0.23354907479137998
----
EvaluatorHoldout: Processed 13646 (100.0%) in 32.00 sec. Users per second: 426
Norm: 1, Alpha: 0.2, Result: 0.23613324388331675
----
EvaluatorHoldout: Processed 13646 (100.0%) in 31.40 sec. Users per second: 435
Norm: 1, Alpha: 0.30000000000000004, Result: 0.23912988921691034
----
EvaluatorHoldout: Processed 13646 (100.0%) in 31.57 sec. Users per second: 432
Norm: 1, Alpha: 0.4, Result: 0.241609751331282
----
EvaluatorHoldout: Processed 13646 (100.0%) in 31.91 sec. Users per second: 428
Norm: 1, Alpha: 0.5, Result: 0.243610805771328
----
EvaluatorHoldout: Processed 13646 (100.0%) in 32.02 sec. Users per second: 426
Norm: 1, Alpha: 0.6000000000000001, Result: 0.2449699981854091
----
EvaluatorHoldout: Processed 13646 (100.

In [19]:
recommender_object = DifferentLossScoresHybridRecommender(URM_train, slim_en, mf_ials)

best_model = {
    "MAP" : 0,
    "alpha" : 0,
    "norm" : 0
}

for norm in [2]:
    for alpha in np.arange(0.85,0.95,0.01):
        print("----")
        recommender_object.fit(norm, alpha)

        result_df, _ = evaluator_valid.evaluateRecommender(recommender_object)
        print("Norm: {}, Alpha: {}, Result: {}".format(norm, alpha, result_df.loc[10]["MAP"]))

        if result_df.loc[10]["MAP"] > best_model["MAP"]:
            best_model["MAP"] = result_df.loc[10]["MAP"]
            best_model["alpha"] = alpha
            best_model["norm"] = norm
print("----")
print("Best model has MAP: {} with alpha: {}, norm: {}".format(best_model["MAP"], best_model["alpha"], best_model["norm"]))

----
EvaluatorHoldout: Processed 13646 (100.0%) in 2.56 min. Users per second: 89
Norm: 2, Alpha: 0.85, Result: 0.24742979802209672
----
EvaluatorHoldout: Processed 13646 (100.0%) in 2.55 min. Users per second: 89
Norm: 2, Alpha: 0.86, Result: 0.2473542424200131
----
EvaluatorHoldout: Processed 13646 (100.0%) in 2.53 min. Users per second: 90
Norm: 2, Alpha: 0.87, Result: 0.24726825582937406
----
EvaluatorHoldout: Processed 13646 (100.0%) in 2.53 min. Users per second: 90
Norm: 2, Alpha: 0.88, Result: 0.2473814380166071
----
EvaluatorHoldout: Processed 13646 (100.0%) in 2.56 min. Users per second: 89
Norm: 2, Alpha: 0.89, Result: 0.24733452328608485
----
EvaluatorHoldout: Processed 13646 (100.0%) in 2.75 min. Users per second: 83
Norm: 2, Alpha: 0.9, Result: 0.24737055338037323
----
EvaluatorHoldout: Processed 13646 (100.0%) in 2.53 min. Users per second: 90
Norm: 2, Alpha: 0.91, Result: 0.24745395185286087
----
EvaluatorHoldout: Processed 13646 (100.0%) in 2.57 min. Users per second: 

In [23]:
#fit the IALS model with URM_all
mf_ials_all = IALSRecommender(URM_all)
mf_ials_all.fit(num_factors = 31,
            confidence_scaling= 'log',
            alpha = 0.0024941846820976015,
            epsilon = 3.449297756742473,
            reg = 5.61162089901928e-05,
            epochs = 40)

IALSRecommender: Epoch 1 of 40. Elapsed time 8.24 sec
IALSRecommender: Epoch 2 of 40. Elapsed time 17.71 sec
IALSRecommender: Epoch 3 of 40. Elapsed time 25.92 sec
IALSRecommender: Epoch 4 of 40. Elapsed time 34.34 sec
IALSRecommender: Epoch 5 of 40. Elapsed time 42.79 sec
IALSRecommender: Epoch 6 of 40. Elapsed time 52.44 sec
IALSRecommender: Epoch 7 of 40. Elapsed time 1.01 min
IALSRecommender: Epoch 8 of 40. Elapsed time 1.15 min
IALSRecommender: Epoch 9 of 40. Elapsed time 1.29 min
IALSRecommender: Epoch 10 of 40. Elapsed time 1.45 min
IALSRecommender: Epoch 11 of 40. Elapsed time 1.59 min
IALSRecommender: Epoch 12 of 40. Elapsed time 1.73 min
IALSRecommender: Epoch 13 of 40. Elapsed time 1.87 min
IALSRecommender: Epoch 14 of 40. Elapsed time 2.03 min
IALSRecommender: Epoch 15 of 40. Elapsed time 2.16 min
IALSRecommender: Epoch 16 of 40. Elapsed time 2.29 min
IALSRecommender: Epoch 17 of 40. Elapsed time 2.43 min
IALSRecommender: Epoch 18 of 40. Elapsed time 2.59 min
IALSRecommende

In [24]:
# create a Slim elasticnet trained with URM_all
slim_en_all = SLIMElasticNetRecommender(URM_all)
slim_en_all.fit(epochs = 500, l1_ratio = 0.0023170159712850467, alpha = 0.09078974149197175, positive_only = True, topK = 363)

SLIMElasticNetRecommender: Processed 1140 ( 6.3%) in 5.00 min. Items per second: 3.80
SLIMElasticNetRecommender: Processed 2285 (12.7%) in 10.00 min. Items per second: 3.81
SLIMElasticNetRecommender: Processed 3439 (19.0%) in 15.01 min. Items per second: 3.82
SLIMElasticNetRecommender: Processed 4582 (25.4%) in 20.01 min. Items per second: 3.82
SLIMElasticNetRecommender: Processed 5728 (31.7%) in 25.01 min. Items per second: 3.82
SLIMElasticNetRecommender: Processed 6874 (38.1%) in 30.01 min. Items per second: 3.82
SLIMElasticNetRecommender: Processed 8017 (44.4%) in 35.01 min. Items per second: 3.82
SLIMElasticNetRecommender: Processed 9168 (50.8%) in 40.01 min. Items per second: 3.82
SLIMElasticNetRecommender: Processed 10303 (57.1%) in 45.02 min. Items per second: 3.81
SLIMElasticNetRecommender: Processed 11439 (63.3%) in 50.02 min. Items per second: 3.81
SLIMElasticNetRecommender: Processed 12566 (69.6%) in 55.02 min. Items per second: 3.81
SLIMElasticNetRecommender: Processed 1365

In [25]:
recommender = DifferentLossScoresHybridRecommender(URM_all, slim_en_all, mf_ials_all)
recommender.fit(norm=2 , alpha=0.91)

# Create final recommendations

In [None]:
#let's use the hybrid model for final recommandations with the alpha tuned (NO NORMALIZATION)

#recommender = ScoresHybridRecommender(URM_all, slim_en, mf_ials)
#recommender.fit(alpha = 0.7924)
#evaluator_valid.evaluateRecommender(recommender)

In [26]:
test_users = pd.read_csv('../input/recommender-system-2021-challenge-polimi/data_target_users_test.csv')
test_users

Unnamed: 0,user_id
0,0
1,1
2,2
3,3
4,4
...,...
13645,13645
13646,13646
13647,13647
13648,13648


In [27]:
user_id = test_users['user_id']
recommendations = []
for user in user_id:
    recommendations.append(recommender.recommend(user,cutoff = 10))

In [28]:
for index in range(len(recommendations)):
    recommendations[index]=np.array(recommendations[index])
    
test_users['item_list']= recommendations
test_users['item_list'] = pd.DataFrame([str(line).strip('[').strip(']').replace("'","") for line in test_users['item_list']])
test_users.to_csv('submission.csv', index=False)