In [1]:
from SLIM_BPR.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import sparse
import random
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfTransformer
import gc
import utils
from GraphBased.P3alphaRecommender import P3alphaRecommender

TEST_SET_THRESHOLD = 10
TEST_SET_HOLDOUT = 0.2
BEST_ALFA = 0.8
BEST_BETA = 0.9
BEST_BATCH = 10
BEST_K = 50

In [2]:
tracks = pd.read_csv('../input/tracks.csv')
train = pd.read_csv('../input/train.csv')
target = pd.read_csv('../input/target_playlists.csv')

In [3]:
icm_csr = utils.build_icm_csr(tracks)
urm_csr = utils.build_urm_csr(train)



In [4]:
from Notebooks_utils.data_splitter import train_test_holdout

URM_train, URM_test = train_test_holdout(urm_csr, train_perc = 0.8)
URM_train, URM_validation = train_test_holdout(URM_train, train_perc = 0.9)

In [5]:
URM_test

<50446x20635 sparse matrix of type '<class 'numpy.float32'>'
	with 242740 stored elements in Compressed Sparse Row format>

In [6]:
nnz_per_row = URM_test.getnnz(axis=1)
result = np.where(nnz_per_row >0)[0]

In [7]:
len(result)

47521

In [8]:
class EnsembleRecommender(object):
    
    def get_URM_train(self):
        return self.URM_csr.copy()
    
    def fit(self,slim_recommender, URM_csr, ICM_csr, alfa, beta, gamma):
        self.beta = beta
        self.gamma = gamma
        self.slim_recommender = slim_recommender
        P3alpha = P3alphaRecommender(URM_csr)
        P3alpha.fit()
        transformer = TfidfTransformer()
        transformer.fit(URM_csr)
        tf_idf_csr = transformer.transform(URM_csr)

        IRM = sparse.csr_matrix(tf_idf_csr.transpose())
        
        csr_similarities = sparse.csr_matrix(cosine_similarity(IRM, dense_output=False))
        

        transformer.fit(ICM_csr)
        tf_idf_icm = transformer.transform(ICM_csr)
        icm_similarities = sparse.csr_matrix(cosine_similarity(tf_idf_icm, dense_output=False))
        
        print("COMPUTING ENSEMBLE SIMILARITIES")
        self.item_similarities = alfa*csr_similarities + (1-alfa)*icm_similarities
        self.item_similarities = gamma*self.item_similarities + (1-gamma) * P3alpha.W_sparse
        self.URM_csr = URM_csr
        
    
    def recommend(self, user_id, cutoff=10, remove_seen_flag=True):
        
        
        user = self.URM_csr.getrow(user_id)
        itemPopularity = user.dot(self.item_similarities)
        slimPopularity = self.slim_recommender.compute_item_score(user_id)
        item_popularity = itemPopularity*self.beta + slimPopularity*(1-self.beta)
        popularItems = np.argsort(np.array(item_popularity)[0])
        popularItems = np.flip(popularItems, axis = 0)

        if remove_seen_flag:
            unseen_items_mask = np.in1d(popularItems, self.URM_csr[user_id].indices,
                                        assume_unique=True, invert = True)

            unseen_items = popularItems[unseen_items_mask]
            
            recommended_items = unseen_items[0:cutoff]

        else:
            recommended_items = popularItems[0:cutoff]
            
        #recommended_items = " ".join(str(i) for i in recommended_items)
        return recommended_items
    
    

In [9]:
recommender = SLIM_BPR_Cython(URM_train,recompile_cython=False,positive_threshold=1) 

SLIM_BPR_Cython: Estimated memory required for similarity matrix of 20635 items is 1703.21 MB


In [10]:
recommender.fit(epochs=150, batch_size=5,sgd_mode='adam',learning_rate=1e-4,topK=80)

Processed 871694 ( 100.00% ) in 3.22 seconds. BPR loss is 5.97E-03. Sample per second: 270355
SLIM_BPR_Recommender: Epoch 1 of 150. Elapsed time 0.11 min
Processed 871694 ( 100.00% ) in 2.78 seconds. BPR loss is 2.26E-02. Sample per second: 313852
SLIM_BPR_Recommender: Epoch 2 of 150. Elapsed time 0.15 min
Processed 871694 ( 100.00% ) in 3.33 seconds. BPR loss is 4.22E-02. Sample per second: 261769
SLIM_BPR_Recommender: Epoch 3 of 150. Elapsed time 0.19 min
Processed 871694 ( 100.00% ) in 2.88 seconds. BPR loss is 6.41E-02. Sample per second: 302890
SLIM_BPR_Recommender: Epoch 4 of 150. Elapsed time 0.23 min
Processed 871694 ( 100.00% ) in 3.43 seconds. BPR loss is 8.79E-02. Sample per second: 254389
SLIM_BPR_Recommender: Epoch 5 of 150. Elapsed time 0.28 min
Processed 871694 ( 100.00% ) in 3.01 seconds. BPR loss is 1.15E-01. Sample per second: 289318
SLIM_BPR_Recommender: Epoch 6 of 150. Elapsed time 0.32 min
Processed 871694 ( 100.00% ) in 2.56 seconds. BPR loss is 1.42E-01. Sample p

In [15]:
from utils import evaluate_algorithm

In [21]:
gammas = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
MAPS = []
for gamma in gammas:
    print("testing gamma = ", gamma)
    ensemble_recommender = EnsembleRecommender()
    ensemble_recommender.fit(recommender,URM_train,icm_csr,alfa = 0.8,beta = 0.9,gamma = gamma)
    gc.collect()
    evaluator = SequentialEvaluator(URM_test,cutoff_list=[10])
    datas,_ = evaluator.evaluateRecommender(ensemble_recommender)
    MAP = datas[10]['MAP']
    MAPS.append([gamma,MAP])
    gc.collect()

testing gamma =  0
COMPUTING ENSEMBLE SIMILARITIES


TypeError: recommend() got an unexpected keyword argument 'remove_top_pop_flag'

In [19]:
from operator import itemgetter
sorted(MAPS, key = itemgetter(1))

[[0, 0.082204689442336928],
 [0.1, 0.082204689442336928],
 [0.2, 0.082204689442336928],
 [0.3, 0.082204689442336928],
 [0.4, 0.082204689442336928],
 [0.5, 0.082204689442336928],
 [0.6, 0.082204689442336928],
 [0.7, 0.082204689442336928],
 [0.8, 0.082204689442336928],
 [0.9, 0.082204689442336928],
 [1, 0.082204689442336928]]

In [None]:
target = target.get_values()[:,0]

In [None]:
n = 500
  
# using list comprehension 
divided_target = [target[i * n:(i + 1) * n] for i in range((len(target) + n - 1) // n )]  

In [None]:
result = []
for items in target:
    partial_recomendations = ensemble_recommender.recommend(items,cutoff=10)
    result.append(partial_recomendations)

In [None]:
result = np.array(result)

In [None]:
result = result.reshape(-1,10)

In [None]:
i = 0
results = []
for item in result:
    recommended_items = " ".join(str(i) for i in item)
    temp = [target[i],recommended_items]
    results.append(temp)
    i += 1

In [None]:
rec = pd.DataFrame(results)
rec.to_csv("submission_ensemble.csv", index = False, header = ["playlist_id", "track_ids"])