In [12]:
from Data_Handler.DataReader import DataReader
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from hybrid import *
from Recommenders.SLIM.SLIMElasticNetRecommender import SLIMElasticNetRecommender
from Recommenders.GraphBased.RP3betaRecommender import RP3betaRecommender
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from tqdm import tqdm
from evaluator import evaluate
import pandas as pd
import numpy as np
from Evaluation.Evaluator import EvaluatorHoldout

In [13]:
dataReader = DataReader()
urm_aug = dataReader.load_augmented_binary_urm()
urm_pow = dataReader.load_powerful_binary_urm()
urm_aug, icm = dataReader.paddingICMandURM(urm_aug)
URM_train_aug, URM_valid_aug = split_train_in_two_percentage_global_sample(urm_aug, train_percentage = 0.9)
URM_train_pow, URM_valid_pow = split_train_in_two_percentage_global_sample(urm_pow, train_percentage = 0.9)

4877


In [14]:
evaluator_valid_aug = EvaluatorHoldout(URM_valid_aug, cutoff_list=[10])
evaluator_valid_pow = EvaluatorHoldout(URM_valid_pow, cutoff_list=[10])

EvaluatorHoldout: Ignoring 2769 ( 6.7%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 2890 ( 6.9%) Users that have less than 1 test interactions


In [15]:
SSLIM_rec = SLIMElasticNetRecommender(URM_train_pow)
RP3b_rec = RP3betaRecommender(URM_train_aug)
ItemKNNCF_rec = ItemKNNCFRecommender(URM_train_aug)
SSLIM_rec.fit(l1_ratio=0.007467817120176792,alpha=0.0016779515713674044, positive_only=True, topK=723)
RP3b_rec.fit(alpha=0.2686781702308662, beta=0.39113126168484014, topK=455, normalize_similarity=True)
ItemKNNCF_rec.fit(topK=1199, shrink=229.22107382005083,similarity='cosine', normalize=True, feature_weighting="TF-IDF")

SLIMElasticNetRecommender: URM Detected 307 ( 1.1%) items with no interactions.
RP3betaRecommender: URM Detected 3461 (12.4%) items with no interactions.
ItemKNNCFRecommender: URM Detected 3461 (12.4%) items with no interactions.
SLIMElasticNetRecommender: Processed 5894 (21.1%) in 5.00 min. Items per second: 19.64
SLIMElasticNetRecommender: Processed 12230 (43.7%) in 10.00 min. Items per second: 20.38
SLIMElasticNetRecommender: Processed 18555 (66.3%) in 15.00 min. Items per second: 20.61
SLIMElasticNetRecommender: Processed 24481 (87.5%) in 20.00 min. Items per second: 20.40
SLIMElasticNetRecommender: Processed 27968 (100.0%) in 23.03 min. Items per second: 20.24
RP3betaRecommender: Similarity column 27968 (100.0%), 1857.20 column/sec. Elapsed time 15.06 sec
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 27968 (100.0%), 640.23 column/sec. Elapsed time 43.68 sec


In [16]:
print("MAP of the starting models")

result_df, _ = evaluator_valid_pow.evaluateRecommender(SSLIM_rec)
print("SLIM ElasticNet - MAP: {}".format(result_df.loc[10]["MAP"]))

result_df, _ = evaluator_valid_aug.evaluateRecommender(RP3b_rec)
print("RP3beta - MAP: {}".format(result_df.loc[10]["MAP"]))

result_df, _ = evaluator_valid_aug.evaluateRecommender(ItemKNNCF_rec)
print("RP3beta - MAP: {}".format(result_df.loc[10]["MAP"]))

MAP of the starting models
EvaluatorHoldout: Processed 38744 (100.0%) in 32.98 sec. Users per second: 1175
SLIM ElasticNet - MAP: 0.019564035926504714
EvaluatorHoldout: Processed 38860 (100.0%) in 23.69 sec. Users per second: 1640
RP3beta - MAP: 0.016080503680285074
EvaluatorHoldout: Processed 38860 (100.0%) in 41.11 sec. Users per second: 945
RP3beta - MAP: 0.015318784770727775


In [17]:
from numpy import linalg as LA
from Recommenders.BaseRecommender import BaseRecommender

class DifferentLossScoresHybridRecommender(BaseRecommender):
    """ ScoresHybridRecommender
    Hybrid of three predictions scores
    R = R1*alpha + R2*beta + R3*(1-alpha-beta)
    
    Class from Dacrema exercise modified by Antonio Ercolani
    The original took as input 2 recommender

    """

    RECOMMENDER_NAME = "DifferentLossScoresHybridRecommender"


    def __init__(self, URM_train, recommender_1, recommender_2, recommender_3):
        super(DifferentLossScoresHybridRecommender, self).__init__(URM_train)

        self.URM_train = sps.csr_matrix(URM_train)
        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
        self.recommender_3 = recommender_3
        
        
        
    def fit(self, norm, alpha = 0.5, beta = 0.5):

        self.alpha = alpha
        self.beta = beta
        self.norm = norm


    def _compute_item_score(self, user_id_array, items_to_compute):
        
        item_weights_1 = self.recommender_1._compute_item_score(user_id_array)
        item_weights_2 = self.recommender_2._compute_item_score(user_id_array)
        item_weights_3 = self.recommender_3._compute_item_score(user_id_array)

        norm_item_weights_1 = LA.norm(item_weights_1, self.norm)
        norm_item_weights_2 = LA.norm(item_weights_2, self.norm)
        norm_item_weights_3 = LA.norm(item_weights_3, self.norm)
        
        
        if norm_item_weights_1 == 0:
            raise ValueError("Norm {} of item weights for recommender 1 is zero. Avoiding division by zero".format(self.norm))
        
        if norm_item_weights_2 == 0:
            raise ValueError("Norm {} of item weights for recommender 2 is zero. Avoiding division by zero".format(self.norm))
            
        if norm_item_weights_3 == 0:
            raise ValueError("Norm {} of item weights for recommender 3 is zero. Avoiding division by zero".format(self.norm))
        
        item_weights = item_weights_1 / norm_item_weights_1 * self.alpha + item_weights_2 / norm_item_weights_2 * self.beta + item_weights_3 / norm_item_weights_3 * (1-self.alpha-self.beta)

        return item_weights

In [18]:
recommender_object = DifferentLossScoresHybridRecommender(URM_train_aug, SSLIM_rec, RP3b_rec, ItemKNNCF_rec)

best_model = {
    "MAP" : 0,
    "alpha" : 0,
    "beta" : 0,
    "norm" : 0
}

for norm in [1,2]:
    for alpha in np.arange(0.0, 1.1, 0.1):
        for beta in np.arange(0.0, 1.1, 0.1):
            
            #truncate digits since np.arange sometimes doesn't
            alpha = round(alpha,1)
            beta = round(beta,1)
            
            
            #discard cases in which the sum is greater than 1 
            if ( (alpha+beta) <= 1): 
                theta = round(1-alpha-beta,1)
            
                print("----")
                recommender_object.fit(norm, alpha, beta)
                result_df, _ = evaluator_valid_aug.evaluateRecommender(recommender_object)
                print("Norm: {}, Alpha: {}, Beta: {}, Theta: {}, Result: {}".format(norm, alpha, beta, 1-alpha-beta, result_df.loc[10]["MAP"]))

                if result_df.loc[10]["MAP"] > best_model["MAP"]:
                    best_model["MAP"] = result_df.loc[10]["MAP"]
                    best_model["alpha"] = alpha
                    best_model["norm"] = norm

                    print("*** New best model found! ")
                    print("New best model has MAP: {} with alpha: {}, beta: {}, theta: {}, norm: {}".format(best_model["MAP"], best_model["alpha"], best_model["beta"],
                                                                                                            1-best_model["alpha"]-best_model["beta"], best_model["norm"]))

print("----")
print("Best model has MAP: {} with alpha: {}, beta: {}, norm: {}".format(best_model["MAP"], best_model["alpha"], best_model["beta"], best_model["norm"]))



DifferentLossScoresHybridRecommender: URM Detected 3461 (12.4%) items with no interactions.
----
EvaluatorHoldout: Processed 38860 (100.0%) in 1.11 min. Users per second: 586
Norm: 1, Alpha: 0.0, Beta: 0.0, Theta: 1.0, Result: 0.015318784770727775
*** New best model found! 
New best model has MAP: 0.015318784770727775 with alpha: 0.0, beta: 0, theta: 1.0, norm: 1
----
EvaluatorHoldout: Processed 38860 (100.0%) in 1.10 min. Users per second: 588
Norm: 1, Alpha: 0.0, Beta: 0.1, Theta: 0.9, Result: 0.015528719293516069
*** New best model found! 
New best model has MAP: 0.015528719293516069 with alpha: 0.0, beta: 0, theta: 1.0, norm: 1
----
EvaluatorHoldout: Processed 38860 (100.0%) in 1.10 min. Users per second: 587
Norm: 1, Alpha: 0.0, Beta: 0.2, Theta: 0.8, Result: 0.01569176388174075
*** New best model found! 
New best model has MAP: 0.01569176388174075 with alpha: 0.0, beta: 0, theta: 1.0, norm: 1
----
EvaluatorHoldout: Processed 38860 (100.0%) in 1.09 min. Users per second: 593
Norm:

In [None]:
#re-tune - Norm: , Alpha: 0.5, Beta: 0.5, Theta: 0, Result: 0.2505692650743614
#further exploration

best_model = {
    "MAP" : 0,
    "alpha" : 0,
    "beta" : 0,
    "norm" : 0
}

for norm in [1]:
    for alpha in np.arange(0.45, 0.55, 0.01):
        for beta in np.arange(0.45, 0.55, 0.01):
            
            
            #discard cases in which the sum is greater than 1 
            if ( (alpha+beta) <= 1): 
                theta = round(1-alpha-beta,1)
            
                print("----")
                recommender_object.fit(norm, alpha, beta)
                result_df, _ = evaluator_valid_aug.evaluateRecommender(recommender_object)
                print("Norm: {}, Alpha: {}, Beta: {}, Theta: {}, Result: {}".format(norm, alpha, beta, 1-alpha-beta, result_df.loc[10]["MAP"]))

                if result_df.loc[10]["MAP"] > best_model["MAP"]:
                    best_model["MAP"] = result_df.loc[10]["MAP"]
                    best_model["alpha"] = alpha
                    best_model["beta"] = beta
                    best_model["norm"] = norm

                    print("*** New best model found! ")
                    print("New best model has MAP: {} with alpha: {}, beta: {}, theta: {}, norm: {}".format(best_model["MAP"], best_model["alpha"], best_model["beta"],
                                                                                                            1-best_model["alpha"]-best_model["beta"], best_model["norm"]))

print("----")
print("Best model has MAP: {} with alpha: {}, beta: {}, norm: {}".format(best_model["MAP"], best_model["alpha"], best_model["beta"], best_model["norm"]))