In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import sparse
import random
import utils
from scipy.sparse import linalg
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfTransformer
from FW_Similarity.CFW_D_Similarity_Linalg import CFW_D_Similarity_Linalg
from Base.Evaluation.Evaluator import SequentialEvaluator
from Notebooks_utils.data_splitter import train_test_holdout
from SLIM_ElasticNet import SLIMElasticNetRecommender as SLIM

TEST_SET_THRESHOLD = 10
TEST_SET_HOLDOUT = 0.2


In [2]:
tracks = pd.read_csv('../input/tracks.csv')
train = pd.read_csv('../input/train.csv')
target = pd.read_csv('../input/target_playlists.csv')

# Defining methods to create csr matrices

In [3]:
icm_csr = utils.build_icm_csr(tracks)
urm_csr = utils.build_urm_csr(train)



In [4]:
URM_train, URM_test = train_test_holdout(urm_csr, train_perc = 1 - TEST_SET_HOLDOUT)
URM_train, URM_validation = train_test_holdout(URM_train, train_perc = 0.9)

# ALGORITHM

In [5]:
class EnsembleRecommender(object):
    def get_URM_train(self):
        return self.URM_csr
    def fit(self, URM_csr, ICM_csr, min_common_features = 10):
        self.min_common_features = min_common_features
        transformer = TfidfTransformer()
        transformer.fit(URM_csr)
        tf_idf_csr = transformer.transform(URM_csr)

        IRM = sparse.csr_matrix(tf_idf_csr.transpose())
        
        csr_similarities = sparse.csr_matrix(cosine_similarity(IRM, dense_output=False))
        

        transformer.fit(ICM_csr)
        tf_idf_icm = transformer.transform(ICM_csr)
        icm_similarities = sparse.csr_matrix(cosine_similarity(tf_idf_icm, dense_output=False))
        
        print("COMPUTING ENSEMBLED CONTENT SIMILARITIES")
        #self.item_similarities = alfa*csr_similarities + (1-alfa)*icm_similarities  
        
        # Get common structure
        W_sparse_CF_structure = icm_similarities.copy()
        W_sparse_CF_structure.data = np.ones_like(W_sparse_CF_structure.data)

        W_sparse_CBF_structure = csr_similarities.copy()
        W_sparse_CBF_structure.data = np.ones_like(W_sparse_CBF_structure.data)

        W_sparse_common = W_sparse_CF_structure.multiply(W_sparse_CBF_structure)

        # Get values of both in common structure of CF
        W_sparse_delta = icm_similarities.multiply(W_sparse_common)
        W_sparse_delta -= csr_similarities.multiply(W_sparse_common)
        
        W_sparse_delta_sorted = np.sort(W_sparse_delta.data.copy())
        
        print("CREATING CFW...")
        self.CFW_weithing = CFW_D_Similarity_Linalg(URM_csr, ICM_csr, csr_similarities)
        print("FITTING CFW...")
        self.CFW_weithing.fit()
        self.URM_csr = URM_csr
        
    
    def recommend(self, user_id, at=10, remove_seen_flag=True, alfa = 0.9):
        
        user = self.URM_csr.getrow(user_id)
        itemPopularity = user.dot(self.icm_similarities) + user.dot(self.csr_similarities)
        popularItems = np.argsort(np.array(itemPopularity.todense())[0])
        popularItems = np.flip(popularItems, axis = 0)

        if remove_seen_flag:
            unseen_items_mask = np.in1d(popularItems, self.URM_csr[user_id].indices,
                                        assume_unique=True, invert = True)

            unseen_items = popularItems[unseen_items_mask]
            
            recommended_items = unseen_items[0:at]

        else:
            recommended_items = popularItems[0:at]
            
        #recommended_items = " ".join(str(i) for i in recommended_items)
        return recommended_items

# Testing algorithm

In [6]:
ratios = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
ks = [10,25,50,75,100,150,200,250]
MAPS = []
for ratio in ratios:
    for k in ks:
        print("TRAINING with top {}".format(k))
        evaluator = SequentialEvaluator(URM_test,cutoff_list=[10])
        recommender = SLIM.SLIMElasticNetRecommender(URM_train) 
        recommender.fit(l1_ratio = ratio, positive_only = True, topK = k)
        gc.collect()
        datas,_ = evaluator.evaluateRecommender(recommender)
        MAP = datas[10]['MAP']
        MAPS.append([ratio,k,MAP])
        print("*****RESULT with {} ratio and top {} is MAP = {}".format(ratio,k,MAP))
        gc.collect()

TRAINING with top 10
Processed 5927 ( 28.72% ) in 5.00 minutes. Items per second: 20


KeyboardInterrupt: 

In [6]:
ensemble = EnsembleRecommender()
print("FITTING...")
ensemble.fit(URM_train,icm_csr)

#evaluate_algorithm(test_set_csr, ensemble, test_set_playlists, alfa = test)
    

FITTING...
COMPUTING ENSEMBLED CONTENT SIMILARITIES
CREATING CFW...
FITTING CFW...
CFW_D_Similarity_Linalg: Generating train data
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 20600 ( 100 % ), 1753.10 column/sec, elapsed time 0.20 min
CFW_D_Similarity_Linalg: Collaborative S density: 2.20E-02, nonzero cells 9383196
CFW_D_Similarity_Linalg: Content S density: 5.38E-03, nonzero cells 2289674
CFW_D_Similarity_Linalg: Content S structure has 195364 out of 2289674 ( 8.53%) nonzero collaborative cells
CFW_D_Similarity_Linalg: Nonzero collaborative cell sum is: 1.87E+04, average is: 9.56E-02, average over all collaborative data is 2.42E-02
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 20600 ( 100 % ), 1413.37 column/sec, elapsed time 0.24 min


In [7]:
evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[10])
print("EVALUATING...")
results_dict, _ = evaluator_test.evaluateRecommender(ensemble.CFW_weithing)
print(results_dict)

EVALUATING...
SequentialEvaluator: Processed 47522 ( 100.00% ) in 27.30 seconds. Users per second: 1741
{10: {'ROC_AUC': 0.16201952829867047, 'PRECISION': 0.09595048041241283, 'RECALL': 0.09433289055822526, 'RECALL_TEST_LEN': 0.09595048041241283, 'MAP': 0.04586258912064915, 'MRR': 0.12035162661504081, 'NDCG': 0.07611657119746522, 'F1': 0.09513480995824851, 'HIT_RATE': 0.40471781490678, 'ARHR': 0.15159459144402876, 'NOVELTY': 0.007080574316836685, 'DIVERSITY_MEAN_INTER_LIST': 0.9973652910848508, 'DIVERSITY_HERFINDAHL': 0.9997344303641328, 'COVERAGE_ITEM': 0.7406833050642113, 'COVERAGE_USER': 0.9420370296951195, 'DIVERSITY_GINI': 0.33948332633211686, 'SHANNON_ENTROPY': 12.691519799957518}}


# TESTING SINGLE ITERATION