In [1]:
from SLIM_BPR.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import sparse
import random
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.preprocessing import normalize
from sklearn.preprocessing import MaxAbsScaler
import gc
import utils
from GraphBased.P3alphaRecommender import P3alphaRecommender

TEST_SET_THRESHOLD = 10
TEST_SET_HOLDOUT = 0.15
BEST_ALPHA = 0.55
BEST_BETA = 0.86
BEST_GAMMA = 0.35
BEST_BATCH = 10
BEST_K = 50

In [2]:
tracks = pd.read_csv('../input/tracks.csv')
train = pd.read_csv('../input/train.csv')
target = pd.read_csv('../input/target_playlists.csv')

In [3]:
icm_csr = utils.build_icm_csr(tracks)
urm_csr = utils.build_urm_csr(train)



In [4]:
from utils import split

URM_train, URM_test = split(urm_csr,TEST_SET_HOLDOUT = TEST_SET_HOLDOUT,TEST_SET_THRESHOLD = TEST_SET_THRESHOLD)
URM_train, URM_validation = split(URM_train,TEST_SET_HOLDOUT = TEST_SET_HOLDOUT,TEST_SET_THRESHOLD = TEST_SET_THRESHOLD)

In [5]:
URM_validation

<50446x20635 sparse matrix of type '<class 'numpy.float32'>'
	with 55490 stored elements in Compressed Sparse Row format>

In [13]:
class EnsembleRecommender(object):
    
    
    def fit(self,URM_csr,slim_recommender,W_P3, tf_idf_urm, tf_idf_icm, alpha, beta, gamma):
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.slim_recommender = slim_recommender
        self.URM_csr = URM_csr
        IRM = sparse.csr_matrix(tf_idf_urm.transpose())
        print("COMPUTING ENSEMBLE SIMILARITIES")
        urm_similarities = sparse.csr_matrix(cosine_similarity(IRM, dense_output=False))
        scaler = MaxAbsScaler(copy=False)
        scaler.fit(urm_similarities)
        self.urm_similarities = scaler.transform(urm_similarities)
        icm_similarities = sparse.csr_matrix(cosine_similarity(tf_idf_icm, dense_output=False))
        scaler = MaxAbsScaler(copy=False)
        scaler.fit(icm_similarities)
        self.icm_similarities = scaler.transform(icm_similarities)
        scaler = MaxAbsScaler(copy=False)
        scaler.fit(W_P3)
        self.W_P3 = scaler.transform(W_P3)
        item_similarities = self.alpha*self.urm_similarities + (1-self.alpha)*self.icm_similarities
        self.item_similarities = self.gamma*item_similarities + (1-self.gamma) * self.W_P3
        
    def recommend(self, user_id, at=10, remove_seen_flag=True):
        user = self.URM_csr.getrow(user_id)
        itemPopularity = user.dot(self.item_similarities)
        slimPopularity = self.slim_recommender.compute_item_score(user_id)
        item_popularity = itemPopularity*self.beta + slimPopularity*(1-self.beta)
        popularItems = np.argsort(np.array(item_popularity)[0])
        popularItems = np.flip(popularItems, axis = 0)

        if remove_seen_flag:
            unseen_items_mask = np.in1d(popularItems, self.URM_csr[user_id].indices,
                                        assume_unique=True, invert = True)

            unseen_items = popularItems[unseen_items_mask]
            
            recommended_items = unseen_items[0:at]

        else:
            recommended_items = popularItems[0:at]
            
        #recommended_items = " ".join(str(i) for i in recommended_items)
        return recommended_items
    
    

In [44]:
slim_recommender = SLIM_BPR_Cython(urm_csr,recompile_cython=False,positive_threshold=1) 

SLIM_BPR_Cython: Estimated memory required for similarity matrix of 20635 items is 1703.21 MB


In [45]:
slim_recommender.fit(epochs=100,sgd_mode='adam',batch_size=BEST_BATCH,topK=BEST_K)

Processed 1211791 ( 100.00% ) in 5.56 seconds. BPR loss is 3.20E-02. Sample per second: 217946
SLIM_BPR_Recommender: Epoch 1 of 100. Elapsed time 0.14 min
Processed 1211791 ( 100.00% ) in 5.35 seconds. BPR loss is 1.15E-01. Sample per second: 226378
SLIM_BPR_Recommender: Epoch 2 of 100. Elapsed time 0.22 min
Processed 1211791 ( 100.00% ) in 5.02 seconds. BPR loss is 2.12E-01. Sample per second: 241542
SLIM_BPR_Recommender: Epoch 3 of 100. Elapsed time 0.29 min
Processed 1211791 ( 100.00% ) in 4.83 seconds. BPR loss is 3.21E-01. Sample per second: 250974
SLIM_BPR_Recommender: Epoch 4 of 100. Elapsed time 0.37 min
Processed 1211791 ( 100.00% ) in 5.69 seconds. BPR loss is 4.40E-01. Sample per second: 213013
SLIM_BPR_Recommender: Epoch 5 of 100. Elapsed time 0.45 min
Processed 1211791 ( 100.00% ) in 5.44 seconds. BPR loss is 5.72E-01. Sample per second: 222728
SLIM_BPR_Recommender: Epoch 6 of 100. Elapsed time 0.53 min
Processed 1211791 ( 100.00% ) in 5.12 seconds. BPR loss is 7.13E-01. S

In [8]:
nnz_per_row = URM_validation.getnnz(axis=1)
playlists = np.where(nnz_per_row >0)[0]

In [46]:
target = target.get_values()[:,0]

In [47]:
BEST_groups = 19
BEST_alphas:_ = 0.64
betas = 0.75
gammas = 0.81
MAPS_test4 = []
transformer = TfidfTransformer()
transformer.fit(icm_csr)
tf_idf_icm = transformer.transform(icm_csr)
transformer = TfidfTransformer()
transformer.fit(URM_train)
tf_idf_urm = transformer.transform(URM_train)
P3alpha = P3alphaRecommender(tf_idf_urm)
P3alpha.fit(alpha=0.8)
W_P3 = P3alpha.W_sparse
ensemble_recommender_fixed = EnsembleRecommender()
ensemble_recommender_fixed.fit(URM_train,slim_recommender,W_P3,tf_idf_urm,tf_idf_icm,alpha = BEST_ALPHA,beta = BEST_BETA,gamma = BEST_GAMMA)
gc.collect()
for user_group in groups:
    URM_validation_upper, URM_validation_lower = utils.split_for_songs(URM_train,URM_validation,user_group)
    nnz_per_row = URM_validation_upper.getnnz(axis=1)
    playlists = np.where(nnz_per_row >0)[0]
    nnz_per_row = URM_validation_lower.getnnz(axis=1)
    playlists2 = np.where(nnz_per_row >0)[0]
    map_fixed = utils.evaluate_algorithm(URM_validation_upper,ensemble_recommender_fixed,at=10,target_playlists=playlists)
    gc.collect()
    for alpha in alphas:
        for beta in betas:
            for gamma in gammas:
                
                if(len(playlists2 != 0)):
                    ensemble_recommender = EnsembleRecommender()
                    print("testing alpha ={} , beta ={}, gamma ={}".format(alpha,beta,gamma))
                    ensemble_recommender.fit(URM_train,slim_recommender,W_P3,tf_idf_urm,tf_idf_icm,alpha = alpha,beta = beta,gamma = gamma)
                    print("fitted, now evaluating")
                    map2 = utils.evaluate_algorithm(URM_validation_lower,ensemble_recommender,at=10,target_playlists=playlists2)
                    map = (map_fixed + map2)/2
                else:
                    map = map_fixed
                MAPS_test4.append([user_group,alpha,beta,gamma,map])
                print("MAP =",map)
                gc.collect()



COMPUTING ENSEMBLE SIMILARITIES
COMPUTING ENSEMBLE SIMILARITIES


0

In [41]:
file_name = "Maps_ensemble_user_aware6"
np.save(file_name,np.array(MAPS_test4))

In [48]:
BEST_groups = 19
BEST_alpha2 = 0.64
BEST_beta2 = 0.75
BEST_gamma2 = 0.81

transformer = TfidfTransformer()
transformer.fit(icm_csr)
tf_idf_icm = transformer.transform(icm_csr)
transformer = TfidfTransformer()
transformer.fit(urm_csr)
tf_idf_urm = transformer.transform(urm_csr)
P3alpha = P3alphaRecommender(tf_idf_urm)
P3alpha.fit(alpha=0.8)
W_P3 = P3alpha.W_sparse
ensemble_recommender_upper = EnsembleRecommender()
ensemble_recommender_upper.fit(urm_csr,slim_recommender,W_P3,tf_idf_urm,tf_idf_icm,alpha = BEST_ALPHA,beta = BEST_BETA,gamma = BEST_GAMMA)
gc.collect()

map_fixed = utils.evaluate_algorithm(URM_validation_upper,ensemble_recommender_fixed,at=10,target_playlists=playlists)
gc.collect()
ensemble_recommender_lower = EnsembleRecommender()
ensemble_recommender_lower.fit(urm_csr,slim_recommender,W_P3,tf_idf_urm,tf_idf_icm,alpha = BEST_alpha2,beta = BEST_beta2,gamma = BEST_gamma2)
print("fitted")




COMPUTING ENSEMBLE SIMILARITIES
COMPUTING ENSEMBLE SIMILARITIES
fitted


In [49]:
upper = []
lower = []
for item in target:
    nnz = len(urm_csr.getrow(item).data)
    if nnz >= 19:
        upper.append(item)
    else:
        lower.append(item)

In [51]:
lower

[25,
 50,
 60,
 64,
 106,
 169,
 210,
 231,
 256,
 258,
 272,
 275,
 326,
 365,
 407,
 470,
 529,
 537,
 545,
 567,
 592,
 611,
 631,
 642,
 646,
 683,
 704,
 709,
 719,
 724,
 731,
 737,
 774,
 924,
 949,
 957,
 988,
 994,
 1046,
 1057,
 1103,
 1108,
 1120,
 1163,
 1254,
 1285,
 1319,
 1346,
 1363,
 1379,
 1383,
 1398,
 1415,
 1417,
 1454,
 1456,
 1463,
 1524,
 1534,
 1542,
 1554,
 1560,
 1588,
 1590,
 1601,
 1639,
 1661,
 1679,
 1682,
 1692,
 1712,
 1713,
 1731,
 1742,
 1747,
 1814,
 1881,
 1916,
 1927,
 1967,
 1978,
 2015,
 2125,
 2142,
 2185,
 2261,
 2306,
 2312,
 2358,
 2363,
 2364,
 2433,
 2444,
 2473,
 2484,
 2515,
 2517,
 2524,
 2589,
 2598,
 2605,
 2622,
 2624,
 2665,
 2666,
 2680,
 2713,
 2727,
 2734,
 2742,
 2769,
 2777,
 2833,
 2909,
 2922,
 2939,
 2957,
 2988,
 3043,
 3045,
 3049,
 3051,
 3103,
 3146,
 3192,
 3200,
 3244,
 3248,
 3278,
 3287,
 3320,
 3329,
 3334,
 3338,
 3353,
 3386,
 3409,
 3415,
 3477,
 3482,
 3488,
 3511,
 3523,
 3531,
 3578,
 3634,
 3650,
 3678,
 3711,

In [52]:
result_upper = []
for items in upper:
    partial_recomendations = ensemble_recommender_upper.recommend(items,at=10)
    result_upper.append(partial_recomendations)

In [53]:
result_upper = np.array(result_upper)

In [54]:
result_upper = result_upper.reshape(-1,10)

In [55]:
i = 0
results = []
for item in result_upper:
    recommended_items = " ".join(str(i) for i in item)
    temp = [upper[i],recommended_items]
    results.append(temp)
    i += 1

In [56]:
result_lower = []
for items in lower:
    partial_recomendations = ensemble_recommender_lower.recommend(items,at=10)
    result_lower.append(partial_recomendations)

In [57]:
result_lower = np.array(result_lower)

In [58]:
result_lower = result_lower.reshape(-1,10)

In [59]:
i = 0
for item in result_lower:
    recommended_items = " ".join(str(i) for i in item)
    temp = [lower[i],recommended_items]
    results.append(temp)
    i += 1

In [60]:
results = sorted(results, key=itemgetter(0))

In [61]:
results

[[3, '344 10293 13960 3321 18344 6579 12111 752 9740 10654'],
 [6, '8749 1111 20242 20443 8275 4910 11943 797 12311 12510'],
 [7, '11257 8749 11233 14765 5924 17154 20443 13542 12311 10300'],
 [17, '9246 7416 9580 14572 9882 2918 16289 9655 16149 9471'],
 [18, '13941 18578 15716 8890 3130 11951 14879 18763 1302 8885'],
 [19, '2306 16973 19411 3835 10679 4152 4923 18906 15807 4466'],
 [20, '14591 13139 13237 8956 15578 13980 13101 10264 17239 7619'],
 [25, '4202 699 7545 1111 16866 800 11943 7077 1900 5620'],
 [29, '12075 8001 15341 2159 18361 15886 3903 56 1944 2508'],
 [34, '6285 6951 14751 1357 20306 3410 15386 1850 1828 7517'],
 [36, '13139 11107 18381 17239 2674 11206 2912 9705 6253 8956'],
 [40, '18418 8796 3880 1723 8960 20268 7550 13678 20468 1486'],
 [48, '648 7506 8767 20172 16502 10848 8491 2997 2437 2780'],
 [50, '20287 6892 12963 11896 13368 12425 17034 8229 4057 16471'],
 [52, '19173 16293 8838 9441 14827 8456 17247 1357 6577 18963'],
 [59, '628 123 13670 3624 3860 16521 1

In [15]:
rec = pd.DataFrame(results)
rec.to_csv("submission_ensemble_slim_graph.csv", index = False, header = ["playlist_id", "track_ids"])

In [42]:
from operator import itemgetter
MAPS_sorted4 = sorted(MAPS_test4, key = itemgetter(4))
MAPS_sorted4 = np.flip(MAPS_sorted4, axis = 0)
print(MAPS_sorted4[:10])

[[ 19.           0.64         0.75         0.82         0.09323622]
 [ 19.           0.64         0.75         0.78         0.09319065]
 [ 19.           0.63         0.75         0.8          0.09318497]
 [ 19.           0.63         0.73         0.82         0.09318443]
 [ 19.           0.64         0.75         0.8          0.09317384]
 [ 19.           0.64         0.73         0.82         0.09317175]
 [ 19.           0.63         0.75         0.82         0.09317154]
 [ 19.           0.63         0.72         0.8          0.09316886]
 [ 19.           0.64         0.72         0.8          0.09315937]
 [ 19.           0.62         0.75         0.82         0.09315685]]


In [232]:
URM_upper,URM_lower = split_for_songs(URM_train,URM_validation,songs_treshold=7)

In [237]:
nnz_per_row = URM_lower.sum(axis=1)
selected = np.where(nnz_per_row > 0)[0]

In [233]:
URM_upper.indices

array([ 1461,  2494,  3767, ..., 15259, 18739, 19363], dtype=int32)

array([    2,    40,    94, ..., 50319, 50353, 50444])

<1x20635 sparse matrix of type '<class 'numpy.float32'>'
	with 2 stored elements in Compressed Sparse Row format>

In [55]:
URM_validation_upper

<49980x20635 sparse matrix of type '<class 'numpy.float32'>'
	with 51900 stored elements in Compressed Sparse Row format>

In [8]:
target = target.get_values()[:,0]

In [9]:
n = 500
  
# using list comprehension 
divided_target = [target[i * n:(i + 1) * n] for i in range((len(target) + n - 1) // n )]  