In [3]:
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 
import pandas as pd
import scipy.sparse as sps
from Evaluation.Evaluator import EvaluatorHoldout
from Recommenders.BaseRecommender import BaseRecommender
from lightfm import LightFM
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score
import time


<h3 style="color:yellow;background-color:blue">Load all the input using the data loader.</h3>

In [4]:
from data_loader import load_URM, load_ICM

URM_all = load_URM('input/data_train.csv')
ICM_genres = load_ICM('input/data_ICM_genre.csv')
ICM_subgenres = load_ICM('input/data_ICM_subgenre.csv')
ICM_event = load_ICM('input/data_ICM_event.csv')
ICM_channel = load_ICM('input/data_ICM_channel.csv')

In [5]:
URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.9)
#URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train_validation, train_percentage = 0.9)

#evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 13611 ( 0.3%) Users that have less than 1 test interactions


#### Approach 1: BPR

In [None]:
model = LightFM(learning_rate=0.05, loss='bpr')
model.fit(URM_train, epochs=10)

train_precision = precision_at_k(model, URM_train, k=10).mean()
test_precision = precision_at_k(model, URM_test, k=10).mean()

train_auc = auc_score(model, URM_train).mean()
test_auc = auc_score(model, URM_test).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

#### Approach 2: WARP

In [None]:
model = LightFM(learning_rate=0.05, loss='warp')

model.fit_partial(URM_train, epochs=10)

train_precision = precision_at_k(model, URM_train, k=10).mean()
test_precision = precision_at_k(model, URM_test, k=10).mean()

train_auc = auc_score(model, URM_train).mean()
test_auc = auc_score(model, URM_test).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

<h3 style="color:yellow;background-color:blue">Approach 2 seems to be more effective both in Precision and AUC, so we prefer to use WARP loss.</h3>

# Using different learning schedules
`lightfm` implements two learning schedules: <b style="color:yellow">adagrad</b> and <b style="color:yellow">adadelta</b>. Neither is clearly superior, and, like other hyperparameter choices, the best learning schedule will differ based on the problem at hand.

<h3 style="color:yellow;background-color:blue">We have to try both schedules and see what performs better. </h3>
<h4 style="color:yellow;background-color:blue">How? Comparing two models with same #epochs and loss function, using ROC AUC on test set</h4>

In [None]:
from tqdm import tqdm
alpha = 1e-3
epochs = 10

# ADAGRAD MODEL --------------------------------------------------
adagrad_model = LightFM(no_components=30,
                        loss='warp',
                        learning_schedule='adagrad',
                        user_alpha=alpha,
                        item_alpha=alpha)
adagrad_auc = []

for epoch in tqdm(range(epochs), desc="fitting adagrad"):
    adagrad_model.fit_partial(URM_train, epochs=1)
    adagrad_auc.append(auc_score(adagrad_model, URM_test).mean())


# ADADELTA MODEL -------------------------------------------------
adadelta_model = LightFM(no_components=30,
                        loss='warp',
                        learning_schedule='adadelta',
                        user_alpha=alpha,
                        item_alpha=alpha)    
adadelta_auc = []

for epoch in tqdm(range(epochs), desc="fitting adadelta"):
    adadelta_model.fit_partial(URM_train, epochs=1)
    adadelta_auc.append(auc_score(adadelta_model, URM_test).mean())

In [None]:
x = np.arange(len(adagrad_auc))
plt.plot(x, np.array(adagrad_auc))
plt.plot(x, np.array(adadelta_auc))
plt.legend(['adagrad', 'adadelta'], loc='lower right')
plt.show()

<h3 style="color:yellow;background-color:blue">Adagrad seems to be way better than Adadelta, so we prefer to use it</h3>

We can try the same for the k-OS loss.

In [None]:
# ADAGRAD (k-OS) --------------------------------
adagrad_model = LightFM(no_components=30,
                        loss='warp-kos',
                        learning_schedule='adagrad',
                        user_alpha=alpha, item_alpha=alpha)
adagrad_auc = []

for epoch in tqdm(range(epochs),desc="fitting adagrad (k-os)"):
    adagrad_model.fit_partial(URM_train, epochs=1)
    adagrad_auc.append(auc_score(adagrad_model, URM_test).mean())

# ADADELTA (k-OS) --------------------------------
adadelta_model = LightFM(no_components=30,
                        loss='warp-kos',
                        learning_schedule='adadelta',
                        user_alpha=alpha, item_alpha=alpha)
adadelta_auc = []

for epoch in tqdm(range(epochs),desc="fitting adadelta (k-os)"):
    adadelta_model.fit_partial(URM_train, epochs=1)
    adadelta_auc.append(auc_score(adadelta_model, URM_test).mean())

In [None]:
x = np.arange(len(adagrad_auc))
plt.plot(x, np.array(adagrad_auc))
plt.plot(x, np.array(adadelta_auc))
plt.legend(['adagrad', 'adadelta'], loc='lower right')
plt.show()

<h3 style="color:yellow;background-color:blue"> mmmkay, I don't even know what WARP-kos actually does, but I guess Adadelta still sucks so who cares, we will stick to Adagrad</h3>

<h2 style="color:yellow;background-color:darkgreen"> RECAP: we will use WARP as loss function and Adagrad as schedule</h2>

MEMO: max_sampled argument can be useful if the training part takes too long (lower max_sampled == lower training time, but less accuracy)

In [None]:
alpha = 1e-05
epochs = 70
num_components = 32

warp_model = LightFM(no_components=num_components,
                     max_sampled=3,
                    loss='warp',
                    learning_schedule='adagrad',
                    user_alpha=alpha,
                    item_alpha=alpha)

warp_duration = []
warp_auc = []

for epoch in tqdm(range(epochs), desc="fitting WARP model")  :
    start = time.time()
    warp_model.fit_partial(URM_train, epochs=1)
    warp_duration.append(time.time() - start)
    warp_auc.append(auc_score(warp_model, URM_test, train_interactions=URM_train).mean())

x = np.arange(epochs)
plt.plot(x, np.array(warp_duration))
plt.legend(['WARP duration'], loc='upper right')
plt.title('Duration')
plt.show()

x = np.arange(epochs)
plt.plot(x, np.array(warp_auc))
plt.legend(['WARP AUC'], loc='upper right')
plt.title('AUC')
plt.show()

In [None]:
# In order to evaluate put it in a recommender class
class LightFMWrapper(BaseRecommender):
    """LightFMWrapper"""

    RECOMMENDER_NAME = "LightFMWrapper"

    def __init__(self, URM_train):
        super(LightFMWrapper, self).__init__(URM_train)


    def fit(self, ITEM_ALPHA, USER_ALPHA, MAX_SAMPLED, NUM_COMPONENTS, NUM_EPOCHS, NUM_THREADS):
        
        # Let's fit a WARP model
        self.lightFM_model = LightFM(   loss='warp',
                                        learning_schedule='adagrad',
                                        item_alpha=ITEM_ALPHA,
                                        user_alpha=USER_ALPHA,
                                        max_sampled=MAX_SAMPLED,
                                        no_components=NUM_COMPONENTS)

        self.lightFM_model = self.lightFM_model.fit(URM_train, 
                                                    epochs=NUM_EPOCHS,
                                                    num_threads=NUM_THREADS)


    def _compute_item_score(self, user_id_array, items_to_compute = None):
        
        # Create a single (n_items, ) array with the item score, then copy it for every user
        items_to_compute = np.arange(self.n_items) if items_to_compute is None else np.array(items_to_compute)
        
        item_scores = - np.ones((len(user_id_array), self.n_items)) * np.inf

        for user_index, user_id in enumerate(user_id_array):
            item_scores[user_index] = self.lightFM_model.predict(int(user_id), 
                                                                 items_to_compute)

        return item_scores


In [1]:
# Set the number of threads; you can increase this
# if you have more physical cores available.
NUM_THREADS = 4
NUM_COMPONENTS = 10
NUM_EPOCHS = 10
ITEM_ALPHA = 1e-6
USER_ALPHA = 1e-6
MAX_SAMPLED = 3

In [None]:
recommender = LightFMWrapper(URM_train)
recommender.fit(ITEM_ALPHA, USER_ALPHA, MAX_SAMPLED, NUM_COMPONENTS, NUM_EPOCHS, NUM_THREADS)

In [None]:
result_dict, _ = evaluator_test.evaluateRecommender(recommender)
result_dict

<h3 style="color:yellow;background-color:blue"> Nice, now let's try with some item features </h3>

In [None]:
class LightFMWrapper(BaseRecommender):
    """LightFMWrapper"""

    RECOMMENDER_NAME = "LightFMWrapper"

    def __init__(self, URM_train, ICM_train=None):
        super(LightFMWrapper, self).__init__(URM_train)
        
        self.ICM_train = ICM_train.copy()


    def fit(self, ITEM_ALPHA, USER_ALPHA, MAX_SAMPLED, NUM_COMPONENTS, NUM_EPOCHS, NUM_THREADS):
        
        # Let's fit a WARP model
        self.lightFM_model = LightFM(   loss='warp',
                                        learning_schedule='adagrad',
                                        item_alpha=ITEM_ALPHA,
                                        user_alpha=USER_ALPHA,
                                        max_sampled=MAX_SAMPLED,
                                        no_components=NUM_COMPONENTS)

        self.lightFM_model = self.lightFM_model.fit(URM_train, 
                                                    item_features=self.ICM_train, 
                                                    epochs=NUM_EPOCHS, 
                                                    num_threads=NUM_THREADS)


    def _compute_item_score(self, user_id_array, items_to_compute = None):
        
        # Create a single (n_items, ) array with the item score, then copy it for every user
        if items_to_compute is None:
            items_to_compute = np.arange(self.n_items)
            item_features = self.ICM_train 
        else:     
            items_to_compute = np.array(items_to_compute)
            item_features = self.ICM_train[items_to_compute,:]
        
        item_scores = - np.ones((len(user_id_array), self.n_items)) * np.inf

        for user_index, user_id in enumerate(user_id_array):
            item_scores[user_index] = self.lightFM_model.predict(int(user_id), 
                                                                 items_to_compute,
                                                                 item_features = item_features)

        return item_scores


In [None]:
recommender = LightFMWrapper(URM_train, ICM_subgenres)
recommender.fit(ITEM_ALPHA, USER_ALPHA, MAX_SAMPLED, NUM_COMPONENTS, NUM_EPOCHS, NUM_THREADS)

result_dict, _ = evaluator_test.evaluateRecommender(recommender)
result_dict

<h3 style="color:yellow;background-color:blue"> Okay, that's disgusting, but I just noticed that we have some pre-built classes in Recommenders.FactorizationMachines.LightFMRecommender.py, let's try using them </h3>

In [16]:
from Recommenders.FactorizationMachines.LightFMRecommender import LightFMUserHybridRecommender, LightFMItemHybridRecommender

NUM_COMPONENTS = 10
NUM_EPOCHS = 1000
ITEM_ALPHA = 1e-6
USER_ALPHA = 1e-6

recommender = LightFMItemHybridRecommender(URM_train, ICM_subgenres)
recommender.fit(loss='warp',
                sgd_mode='adagrad',
                n_components=NUM_COMPONENTS,
                item_alpha=ITEM_ALPHA, 
                user_alpha=USER_ALPHA,
                epochs=NUM_EPOCHS)

LightFMItemHybridRecommender: ICM Detected 487 ( 2.7%) items with no features.
LightFMItemHybridRecommender: Epoch 1 of 1000. Elapsed time 4.93 sec
LightFMItemHybridRecommender: Epoch 2 of 1000. Elapsed time 9.68 sec
LightFMItemHybridRecommender: Epoch 3 of 1000. Elapsed time 14.38 sec
LightFMItemHybridRecommender: Epoch 4 of 1000. Elapsed time 19.07 sec
LightFMItemHybridRecommender: Epoch 5 of 1000. Elapsed time 23.77 sec
LightFMItemHybridRecommender: Epoch 6 of 1000. Elapsed time 28.43 sec
LightFMItemHybridRecommender: Epoch 7 of 1000. Elapsed time 33.11 sec
LightFMItemHybridRecommender: Epoch 8 of 1000. Elapsed time 37.79 sec
LightFMItemHybridRecommender: Epoch 9 of 1000. Elapsed time 42.44 sec
LightFMItemHybridRecommender: Epoch 10 of 1000. Elapsed time 47.08 sec
LightFMItemHybridRecommender: Epoch 11 of 1000. Elapsed time 51.71 sec
LightFMItemHybridRecommender: Epoch 12 of 1000. Elapsed time 56.32 sec
LightFMItemHybridRecommender: Epoch 13 of 1000. Elapsed time 1.02 min
LightFMIte

In [17]:
result_dict, _ = evaluator_test.evaluateRecommender(recommender)
result_dict

EvaluatorHoldout: Processed 13611 (100.0%) in 19.31 sec. Users per second: 705


Unnamed: 0_level_0,PRECISION,PRECISION_RECALL_MIN_DEN,RECALL,MAP,MAP_MIN_DEN,MRR,NDCG,F1,HIT_RATE,ARHR_ALL_HITS,NOVELTY,AVERAGE_POPULARITY,DIVERSITY_MEAN_INTER_LIST,DIVERSITY_HERFINDAHL,COVERAGE_ITEM,COVERAGE_ITEM_CORRECT,COVERAGE_USER,COVERAGE_USER_CORRECT,DIVERSITY_GINI,SHANNON_ENTROPY,RATIO_DIVERSITY_HERFINDAHL,RATIO_DIVERSITY_GINI,RATIO_SHANNON_ENTROPY,RATIO_AVERAGE_POPULARITY,RATIO_NOVELTY
cutoff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
10,0.206612,0.209924,0.066847,0.103718,0.105213,0.444993,0.220936,0.101013,0.85115,0.674413,0.005639,0.502656,0.930193,0.993012,0.095797,0.054654,0.997143,0.848718,0.01351,8.163994,0.993396,0.054474,0.658906,2.475037,0.023582


<h3 style="color:yellow;background-color:blue"> it's better, but still not that good. I don't understand how can I pass multiple ICMs to the hybrid model </h3>