In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 23/10/17

@author: Maurizio Ferrari Dacrema
"""

import numpy as np
import time, sys
import scipy.sparse as sps



def check_matrix(X, format='csc', dtype=np.float32):
    if format == 'csc' and not isinstance(X, sps.csc_matrix):
        return X.tocsc().astype(dtype)
    elif format == 'csr' and not isinstance(X, sps.csr_matrix):
        return X.tocsr().astype(dtype)
    elif format == 'coo' and not isinstance(X, sps.coo_matrix):
        return X.tocoo().astype(dtype)
    elif format == 'dok' and not isinstance(X, sps.dok_matrix):
        return X.todok().astype(dtype)
    elif format == 'bsr' and not isinstance(X, sps.bsr_matrix):
        return X.tobsr().astype(dtype)
    elif format == 'dia' and not isinstance(X, sps.dia_matrix):
        return X.todia().astype(dtype)
    elif format == 'lil' and not isinstance(X, sps.lil_matrix):
        return X.tolil().astype(dtype)
    else:
        return X.astype(dtype)



class Compute_Similarity_Python:


    def __init__(self, dataMatrix, topK=100, shrink = 0, normalize = True,
                 asymmetric_alpha = 0.5, tversky_alpha = 1.0, tversky_beta = 1.0,
                 similarity = "cosine", row_weights = None):
        """
        Computes the cosine similarity on the columns of dataMatrix
        If it is computed on URM=|users|x|items|, pass the URM as is.
        If it is computed on ICM=|items|x|features|, pass the ICM transposed.
        :param dataMatrix:
        :param topK:
        :param shrink:
        :param normalize:           If True divide the dot product by the product of the norms
        :param row_weights:         Multiply the values in each row by a specified value. Array
        :param asymmetric_alpha     Coefficient alpha for the asymmetric cosine
        :param similarity:  "cosine"        computes Cosine similarity
                            "adjusted"      computes Adjusted Cosine, removing the average of the users
                            "asymmetric"    computes Asymmetric Cosine
                            "pearson"       computes Pearson Correlation, removing the average of the items
                            "jaccard"       computes Jaccard similarity for binary interactions using Tanimoto
                            "dice"          computes Dice similarity for binary interactions
                            "tversky"       computes Tversky similarity for binary interactions
                            "tanimoto"      computes Tanimoto coefficient for binary interactions

        """
        """
        Asymmetric Cosine as described in: 
        Aiolli, F. (2013, October). Efficient top-n recommendation for very large scale binary rated datasets. In Proceedings of the 7th ACM conference on Recommender systems (pp. 273-280). ACM.
        
        """

        super(Compute_Similarity_Python, self).__init__()

        self.TopK = topK
        self.shrink = shrink
        self.normalize = normalize
        self.n_columns = dataMatrix.shape[1]
        self.n_rows = dataMatrix.shape[0]
        self.asymmetric_alpha = asymmetric_alpha
        self.tversky_alpha = tversky_alpha
        self.tversky_beta = tversky_beta

        self.dataMatrix = dataMatrix.copy()

        self.adjusted_cosine = False
        self.asymmetric_cosine = False
        self.pearson_correlation = False
        self.tanimoto_coefficient = False
        self.dice_coefficient = False
        self.tversky_coefficient = False

        if similarity == "adjusted":
            self.adjusted_cosine = True
        elif similarity == "asymmetric":
            self.asymmetric_cosine = True
        elif similarity == "pearson":
            self.pearson_correlation = True
        elif similarity == "jaccard" or similarity == "tanimoto":
            self.tanimoto_coefficient = True
            # Tanimoto has a specific kind of normalization
            self.normalize = False

        elif similarity == "dice":
            self.dice_coefficient = True
            self.normalize = False

        elif similarity == "tversky":
            self.tversky_coefficient = True
            self.normalize = False

        elif similarity == "cosine":
            pass
        else:
            raise ValueError("Cosine_Similarity: value for paramether 'mode' not recognized."
                             " Allowed values are: 'cosine', 'pearson', 'adjusted', 'asymmetric', 'jaccard', 'tanimoto',"
                             "dice, tversky."
                             " Passed value was '{}'".format(similarity))



        if self.TopK == 0:
            self.W_dense = np.zeros((self.n_columns, self.n_columns))


        self.use_row_weights = False

        if row_weights is not None:

            if dataMatrix.shape[0] != len(row_weights):
                raise ValueError("Cosine_Similarity: provided row_weights and dataMatrix have different number of rows."
                                 "Col_weights has {} columns, dataMatrix has {}.".format(len(row_weights), dataMatrix.shape[0]))

            self.use_row_weights = True
            self.row_weights = row_weights.copy()
            self.row_weights_diag = sps.diags(self.row_weights)

            self.dataMatrix_weighted = self.dataMatrix.T.dot(self.row_weights_diag).T






    def applyAdjustedCosine(self):
        """
        Remove from every data point the average for the corresponding row
        :return:
        """

        self.dataMatrix = check_matrix(self.dataMatrix, 'csr')


        interactionsPerRow = np.diff(self.dataMatrix.indptr)

        nonzeroRows = interactionsPerRow > 0
        sumPerRow = np.asarray(self.dataMatrix.sum(axis=1)).ravel()

        rowAverage = np.zeros_like(sumPerRow)
        rowAverage[nonzeroRows] = sumPerRow[nonzeroRows] / interactionsPerRow[nonzeroRows]


        # Split in blocks to avoid duplicating the whole data structure
        start_row = 0
        end_row= 0

        blockSize = 1000


        while end_row < self.n_rows:

            end_row = min(self.n_rows, end_row + blockSize)

            self.dataMatrix.data[self.dataMatrix.indptr[start_row]:self.dataMatrix.indptr[end_row]] -= \
                np.repeat(rowAverage[start_row:end_row], interactionsPerRow[start_row:end_row])

            start_row += blockSize




    def applyPearsonCorrelation(self):
        """
        Remove from every data point the average for the corresponding column
        :return:
        """

        self.dataMatrix = check_matrix(self.dataMatrix, 'csc')


        interactionsPerCol = np.diff(self.dataMatrix.indptr)

        nonzeroCols = interactionsPerCol > 0
        sumPerCol = np.asarray(self.dataMatrix.sum(axis=0)).ravel()

        colAverage = np.zeros_like(sumPerCol)
        colAverage[nonzeroCols] = sumPerCol[nonzeroCols] / interactionsPerCol[nonzeroCols]


        # Split in blocks to avoid duplicating the whole data structure
        start_col = 0
        end_col= 0

        blockSize = 1000


        while end_col < self.n_columns:

            end_col = min(self.n_columns, end_col + blockSize)

            self.dataMatrix.data[self.dataMatrix.indptr[start_col]:self.dataMatrix.indptr[end_col]] -= \
                np.repeat(colAverage[start_col:end_col], interactionsPerCol[start_col:end_col])

            start_col += blockSize


    def useOnlyBooleanInteractions(self):

        # Split in blocks to avoid duplicating the whole data structure
        start_pos = 0
        end_pos= 0

        blockSize = 1000


        while end_pos < len(self.dataMatrix.data):

            end_pos = min(len(self.dataMatrix.data), end_pos + blockSize)

            self.dataMatrix.data[start_pos:end_pos] = np.ones(end_pos-start_pos)

            start_pos += blockSize




    def compute_similarity(self, start_col=None, end_col=None, block_size = 100):
        """
        Compute the similarity for the given dataset
        :param self:
        :param start_col: column to begin with
        :param end_col: column to stop before, end_col is excluded
        :return:
        """

        values = []
        rows = []
        cols = []

        start_time = time.time()
        start_time_print_batch = start_time
        processedItems = 0


        if self.adjusted_cosine:
            self.applyAdjustedCosine()

        elif self.pearson_correlation:
            self.applyPearsonCorrelation()

        elif self.tanimoto_coefficient or self.dice_coefficient or self.tversky_coefficient:
            self.useOnlyBooleanInteractions()


        # We explore the matrix column-wise
        self.dataMatrix = check_matrix(self.dataMatrix, 'csc')


        # Compute sum of squared values to be used in normalization
        sumOfSquared = np.array(self.dataMatrix.power(2).sum(axis=0)).ravel()

        # Tanimoto does not require the square root to be applied
        if not (self.tanimoto_coefficient or self.dice_coefficient or self.tversky_coefficient):
            sumOfSquared = np.sqrt(sumOfSquared)

        if self.asymmetric_cosine:
            sumOfSquared_to_1_minus_alpha = sumOfSquared.power(2 * (1 - self.asymmetric_alpha))
            sumOfSquared_to_alpha = sumOfSquared.power(2 * self.asymmetric_alpha)


        self.dataMatrix = check_matrix(self.dataMatrix, 'csc')

        start_col_local = 0
        end_col_local = self.n_columns

        if start_col is not None and start_col>0 and start_col<self.n_columns:
            start_col_local = start_col

        if end_col is not None and end_col>start_col_local and end_col<self.n_columns:
            end_col_local = end_col




        start_col_block = start_col_local

        this_block_size = 0

        # Compute all similarities for each item using vectorization
        while start_col_block < end_col_local:

            # Add previous block size
            processedItems += this_block_size

            end_col_block = min(start_col_block + block_size, end_col_local)
            this_block_size = end_col_block-start_col_block


            if time.time() - start_time_print_batch >= 30 or end_col_block==end_col_local:
                columnPerSec = processedItems / (time.time() - start_time)

                print("Similarity column {} ( {:2.0f} % ), {:.2f} column/sec, elapsed time {:.2f} min".format(
                    processedItems, processedItems / (end_col_local - start_col_local) * 100, columnPerSec, (time.time() - start_time)/ 60))

                sys.stdout.flush()
                sys.stderr.flush()

                start_time_print_batch = time.time()


            # All data points for a given item
            item_data = self.dataMatrix[:, start_col_block:end_col_block]
            item_data = item_data.toarray().squeeze()

            if self.use_row_weights:
                #item_data = np.multiply(item_data, self.row_weights)
                #item_data = item_data.T.dot(self.row_weights_diag).T
                this_block_weights = self.dataMatrix_weighted.T.dot(item_data)

            else:
                # Compute item similarities
                this_block_weights = self.dataMatrix.T.dot(item_data)



            for col_index_in_block in range(this_block_size):

                if this_block_size == 1:
                    this_column_weights = this_block_weights
                else:
                    this_column_weights = this_block_weights[:,col_index_in_block]


                columnIndex = col_index_in_block + start_col_block
                this_column_weights[columnIndex] = 0.0

                # Apply normalization and shrinkage, ensure denominator != 0
                if self.normalize:

                    if self.asymmetric_cosine:
                        denominator = sumOfSquared_to_alpha[columnIndex] * sumOfSquared_to_1_minus_alpha + self.shrink + 1e-6
                    else:
                        denominator = sumOfSquared[columnIndex] * sumOfSquared + self.shrink + 1e-6

                    this_column_weights = np.multiply(this_column_weights, 1 / denominator)


                # Apply the specific denominator for Tanimoto
                elif self.tanimoto_coefficient:
                    denominator = sumOfSquared[columnIndex] + sumOfSquared - this_column_weights + self.shrink + 1e-6
                    this_column_weights = np.multiply(this_column_weights, 1 / denominator)

                elif self.dice_coefficient:
                    denominator = sumOfSquared[columnIndex] + sumOfSquared + self.shrink + 1e-6
                    this_column_weights = np.multiply(this_column_weights, 1 / denominator)

                elif self.tversky_coefficient:
                    denominator = this_column_weights + \
                                  (sumOfSquared[columnIndex] - this_column_weights)*self.tversky_alpha + \
                                  (sumOfSquared - this_column_weights)*self.tversky_beta + self.shrink + 1e-6
                    this_column_weights = np.multiply(this_column_weights, 1 / denominator)

                # If no normalization or tanimoto is selected, apply only shrink
                elif self.shrink != 0:
                    this_column_weights = this_column_weights/self.shrink


                #this_column_weights = this_column_weights.toarray().ravel()

                if self.TopK == 0:
                    self.W_dense[:, columnIndex] = this_column_weights

                else:
                    # Sort indices and select TopK
                    # Sorting is done in three steps. Faster then plain np.argsort for higher number of items
                    # - Partition the data to extract the set of relevant items
                    # - Sort only the relevant items
                    # - Get the original item index
                    relevant_items_partition = (-this_column_weights).argpartition(self.TopK-1)[0:self.TopK]
                    relevant_items_partition_sorting = np.argsort(-this_column_weights[relevant_items_partition])
                    top_k_idx = relevant_items_partition[relevant_items_partition_sorting]

                    # Incrementally build sparse matrix, do not add zeros
                    notZerosMask = this_column_weights[top_k_idx] != 0.0
                    numNotZeros = np.sum(notZerosMask)

                    values.extend(this_column_weights[top_k_idx][notZerosMask])
                    rows.extend(top_k_idx[notZerosMask])
                    cols.extend(np.ones(numNotZeros) * columnIndex)





            start_col_block += block_size

        # End while on columns


        if self.TopK == 0:
            return self.W_dense

        else:

            W_sparse = sps.csr_matrix((values, (rows, cols)),
                                      shape=(self.n_columns, self.n_columns),
                                      dtype=np.float32)


            return W_sparse

In [None]:
import scipy.sparse as sparse
import numpy as np
class ItemCBFKNNRecommender(object):
    
    def __init__(self, URM, ICM):
        self.URM = URM
        self.ICM = ICM
    
    
    def fit(self, topK=50, shrink=100, normalize = True, similarity = "cosine"):
        
        similarity_object = Compute_Similarity_Python(self.ICM.T, shrink=shrink,
        topK=topK, normalize=normalize,
        similarity = similarity)

        self.W_sparse = similarity_object.compute_similarity()
        
        #print (self.W_sparse)
        #sparse.save_npz('cbsim.npz', self.W_sparse, compressed=True)
        #sparse_matrix = sparse.load_npz('cbsim.npz')
        #print (sparse_matrix)
        return self.W_sparse
    
    
    def recommend(self, user_id, at=None, exclude_seen=True):
        # compute the scores using the dot product
        user_profile = self.URM[user_id]
        scores = user_profile.dot(self.W_sparse).toarray().ravel()
        
        if exclude_seen:
            scores = self.filter_seen(user_id, scores)
        
        # rank items
        ranking = scores.argsort()[::-1]
        
        return ranking[:at]
    
    
    def filter_seen(self, user_id, scores):
        
        start_pos = self.URM.indptr[user_id]
        end_pos = self.URM.indptr[user_id+1]
        
        user_profile = self.URM.indices[start_pos:end_pos]
        
        scores[user_profile] = -np.inf
        
        return scores


In [None]:
import numpy as np
import pandas as pd
from scipy import sparse as sps
from sklearn.preprocessing import MultiLabelBinarizer, normalize
from sklearn import feature_extraction


class Builder(object):
    
    def __init__(self, local):
        if local == True:
            self.train= pd.read_csv('../input/input-csv/train_holdout.csv')
            self.test= pd.read_csv('../input/input-csv/test_holdout.csv')
        else:
            self.train = pd.read_csv('../input/input-csv/new_train.csv')
        self.target_playlists = pd.read_csv('../input/input-csv/target_playlists.csv')
        self.tracks = pd.read_csv('../input/input-csv/tracks.csv')
        #self.ordered_train=pd.read_csv('input/train_sequential.csv')
        #for playlist in np.array(self.target_playlists['playlist_id'])[0:5000]:
        #    self.train = self.train[self.train.playlist_id!=playlist]
        #self.train = self.train.append(self.ordered_train)
        #print (self.train[self.train['playlist_id']==7]['track_id'])
        self.playlists = self.get_playlists()
        #self.tracks_inside_playlists_train = np.empty((len(self.playlists)), dtype=object)

    def get_train_pd(self):
        return self.train

    def get_target_playlists_pd(self):
        return self.target_playlists

    def get_tracks_pd(self):
        return self.tracks

    def get_ordered_target_playlists(self):
        return np.array(self.target_playlists['playlist_id'])[0:5000]

    def get_unordered_target_playlists(self):
        return np.array(self.target_playlists['playlist_id'])[5000:]

    def get_tracks_inside_playlist_train(self, playlist):
        return np.array(self.train[self.train['playlist_id']==playlist]['track_id'])
    
    def get_tracks(self):
        tracks = self.tracks['track_id'].unique()
        return np.sort(tracks)
    
    def get_playlists(self):
        playlists = self.train['playlist_id'].unique()
        return np.sort(playlists)
    
    def get_target_playlists(self):
        target_playlists = self.target_playlists['playlist_id'].unique()
        return np.sort(target_playlists)
    
    def get_artists(self):
        artists = self.tracks['artist_id'].unique()
        return np.sort(artists)
    
    def get_albums(self):
        albums = self.tracks['album_id'].unique()
        return np.sort(albums)
    
    def get_durations(self):
        durations = self.tracks['duration_sec'].unique()
        return np.sort(durations)
    
    def get_URM_test(self):
        playlistsSize = len(self.get_playlists())
        tracksSize = len(self.get_tracks())
        URM_test_row = np.zeros(self.test.shape[0])
        URM_test_col = np.zeros(self.test.shape[0])
        URM_test_values = np.zeros(self.test.shape[0])
        cont = 0
        for playlist in range(playlistsSize):
            tracks = np.array(self.test[self.test['playlist_id']==playlist]['track_id'])
            length = len(tracks)
            if length > 0:
                URM_test_row[cont:cont+length] = [playlist]*length
                URM_test_col[cont:cont+length] = tracks
                URM_test_values[cont:cont+length] = [1]*length
                cont = cont + length
        self.URM_test = sps.csr_matrix( (URM_test_values,(URM_test_row, URM_test_col)), shape=(playlistsSize, tracksSize))
        return self.URM_test

    def get_URM_train(self):
        playlistsSize = len(self.get_playlists())
        tracksSize = len(self.get_tracks())
        URM_train_row = np.zeros(self.train.shape[0])
        URM_train_col = np.zeros(self.train.shape[0])
        URM_train_values = np.zeros(self.train.shape[0])
        cont = 0
        for playlist in range(playlistsSize):
            tracks = np.array(self.train[self.train['playlist_id']==playlist]['track_id'])
            length = len(tracks)
            if length > 0:
                URM_train_row[cont:cont+length] = [playlist]*length
                URM_train_col[cont:cont+length] = tracks
                URM_train_values[cont:cont+length] = [1]*length
                cont = cont + length
        self.URM_train = sps.csr_matrix( (URM_train_values,(URM_train_row, URM_train_col)), shape=(playlistsSize, tracksSize))
        return self.URM_train
    
    def get_URM_transpose_train(self):
        playlistsSize = len(self.get_playlists())
        tracksSize = len(self.get_tracks())
        URM_train_row = np.zeros(self.train.shape[0])
        URM_train_col = np.zeros(self.train.shape[0])
        URM_train_values = np.zeros(self.train.shape[0])
        cont = 0
        for track in range(tracksSize):
            playlists = np.array(self.train[self.train['track_id']==track]['playlist_id'])
            length = len(playlists)
            if length > 0:
                URM_train_row[cont:cont+length] = [track]*length
                URM_train_col[cont:cont+length] = playlists
                URM_train_values[cont:cont+length] = [1]*length
                cont = cont + length
        self.URM_train_transpose = sps.csr_matrix( (URM_train_values,(URM_train_row, URM_train_col)), shape=(tracksSize, playlistsSize))
        return self.URM_train_transpose
    
    def get_ICM(self, a):
        artists = self.tracks.reindex(columns=['track_id', 'artist_id'])
        artists.sort_values(by='track_id', inplace=True)
        artists_list = [[a] for a in artists['artist_id']]
        icm_artists = MultiLabelBinarizer(classes=self.get_artists(), sparse_output=True).fit_transform(artists_list)
        icm_artists_csr = icm_artists.tocsr()
        #return icm_artists_csr
        
        albums = self.tracks.reindex(columns=['track_id', 'album_id'])
        albums.sort_values(by='track_id', inplace=True)
        albums_list = [[a] for a in albums['album_id']]
        icm_albums = MultiLabelBinarizer(classes=self.get_albums(), sparse_output=True).fit_transform(albums_list)
        icm_albums_csr = icm_albums.tocsr()
        #return icm_albums_csr
          
        return sps.hstack((a*icm_artists_csr,icm_albums_csr))



In [None]:
import random
import numpy as np
class HybridRecommender(object):

    def __init__(self, contentSimilarity, collaborativeSimilarity, userbasedsimilarity, a, b, c, builder):
        self.a = a
        self.b = b
        self.c = c
        self.userbasedsimilarity = userbasedsimilarity
        self.contentSimilarity = contentSimilarity
        self.collaborativeSimilarity = collaborativeSimilarity
        #self.bestSimilarTracks = a*contentSimilarity + b*collaborativeSimilarity
        self.URM_transpose =  builder.URM_train_transpose
        self.cont = -1


    def calculate_rankings(self, matrix, tracks,weight):
        tracksSet = set(tracks)
        best = {}
        temp = 1
        minimum = 0
        q = 1/(len(tracks)+1)**2
        for track in tracks:
            row_start = matrix.indptr[track]
            row_end = matrix.indptr[track+1]
            similarTracks = matrix.indices[row_start:row_end]
            similarityValues = matrix.data[row_start:row_end]
            for i in range(0, len(similarTracks)):
                if not similarTracks[i] in tracksSet:
                    if similarTracks[i] in best:
                        best[similarTracks[i]]=best[similarTracks[i]]-similarityValues[i]*temp
                    else:
                        best[similarTracks[i]]=-1*similarityValues[i]*temp
                    minimum = min(minimum, best[similarTracks[i]])
            if self.cont < 5000:
                temp -= q
        for k in best:
            best[k] = weight*best[k]/minimum*(-1)
        return best


    def userbased_calculate_ratings(self, playlist, tracks, matrix, weight, URM_transpose):
        best = {}
        minimum = 0
        row_start = matrix.indptr[playlist]
        row_end = matrix.indptr[playlist+1]
        similarPlaylists = matrix.indices[row_start:row_end]
        similarityValues = matrix.data[row_start:row_end]
        values = {}
        for i in range(len(similarPlaylists)):
            values[similarPlaylists[i]] = similarityValues[i]
        similarPlaylists = set(similarPlaylists)
        for track in tracks:
            row_start = URM_transpose.indptr[track]
            row_end = URM_transpose.indptr[track+1]
            playlistsForTrack = set(URM_transpose.indices[row_start:row_end])
            playlists = similarPlaylists & playlistsForTrack
            ctrl = False
            for p in playlists:
                if ctrl == True:
                    best[track]=best[track]-values[p]
                else:
                    best[track]=-1*values[p]
                    ctrl = True
            if track in best:
                minimum = min(minimum, best[track])
        for k in best:
            best[k] = weight*best[k]/minimum*(-1)
        return best


        


    def recommend1(self, playlist, builder):
        #print(playlist)
        self.cont = self.cont + 1
        tracks = builder.get_tracks_inside_playlist_train(playlist)
        content_ratings = self.calculate_rankings(self.contentSimilarity, tracks, self.a)
        collaborative_ratings = self.calculate_rankings(self.collaborativeSimilarity, tracks, self.b)
        for k in content_ratings:
            if k in collaborative_ratings:
                collaborative_ratings[k] = collaborative_ratings[k] + content_ratings[k]
            else:
                collaborative_ratings[k] = content_ratings[k]
        best = collaborative_ratings
        userbased_ratings = self.userbased_calculate_ratings(playlist, best, self.userbasedsimilarity, self.c, self.URM_transpose)
        for k in userbased_ratings:
            if k in best:
                best[k] = best[k] + userbased_ratings[k]
        preSorted = [[v, k] for k,v in best.items()]
        best = np.empty((max(11,len(preSorted)), 2), dtype=object)
        for i in range(len(preSorted)):
            best[i] = preSorted[i]
        if len(preSorted) < 11:
            for i in range(len(preSorted), 11):
                best[i] = [0, random.randint(0, 20000)]
            #print(best)
        best = best[best[:,0].argpartition(10)][0:10]
        #if playlist == 7:
        #    print(best)
        best = best[best[:,0].argsort()][:,1]
        return best


In [None]:
import numpy as np
import scipy.sparse as sps



def precision(is_relevant, relevant_items):

    #is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)

    precision_score = np.sum(is_relevant, dtype=np.float32) / len(is_relevant)

    return precision_score



def recall(is_relevant, relevant_items):

    #is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)

    recall_score = np.sum(is_relevant, dtype=np.float32) / relevant_items.shape[0]

    return recall_score



def MAP(is_relevant, relevant_items):

    #is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)

    # Cumulative sum: precision at 1, at 2, at 3 ...
    p_at_k = is_relevant * np.cumsum(is_relevant, dtype=np.float32) / (1 + np.arange(is_relevant.shape[0]))

    map_score = np.sum(p_at_k) / np.min([relevant_items.shape[0], is_relevant.shape[0]])

    return map_score



def evaluate_algorithm(URM_test, recommender_object, builder):

    cumulative_precision = 0.0
    cumulative_recall = 0.0
    cumulative_MAP = 0.0

    num_eval = 0

    URM_test = sps.csr_matrix(URM_test)

    ordered_target_playlists = builder.get_ordered_target_playlists()
    unordered_target_playlists = builder.get_unordered_target_playlists()

    for i in range(len(ordered_target_playlists)):
        
        user_id = ordered_target_playlists[i]

        start_pos = URM_test.indptr[user_id]
        end_pos = URM_test.indptr[user_id+1]

        if end_pos-start_pos>0:

            relevant_items = URM_test.indices[start_pos:end_pos]

            recommended_items = recommender_object.recommend1(user_id,builder)
            num_eval+=1

            is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)

            cumulative_precision += precision(is_relevant, relevant_items)
            cumulative_recall += recall(is_relevant, relevant_items)
            cumulative_MAP += MAP(is_relevant, relevant_items)
            if i == 0:
                print(recommended_items)
                print(relevant_items)
            
    #print("ordered finished")
        
    for i in range(len(unordered_target_playlists)):
        
        user_id = unordered_target_playlists[i]

        start_pos = URM_test.indptr[user_id]
        end_pos = URM_test.indptr[user_id+1]

        if end_pos-start_pos>0:

            relevant_items = URM_test.indices[start_pos:end_pos]

            recommended_items = recommender_object.recommend1(user_id,builder)
            num_eval+=1

            is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)

            cumulative_precision += precision(is_relevant, relevant_items)
            cumulative_recall += recall(is_relevant, relevant_items)
            cumulative_MAP += MAP(is_relevant, relevant_items)


    cumulative_precision /= num_eval
    cumulative_recall /= num_eval
    cumulative_MAP /= num_eval

    print("Recommender performance is: Precision = {:.4f}, Recall = {:.4f}, MAP = {:.4f}".format(
        cumulative_precision, cumulative_recall, cumulative_MAP))

    result_dict = {
        "precision": cumulative_precision,
        "recall": cumulative_recall,
        "MAP": cumulative_MAP,
    }

    return result_dict


In [None]:
local = True
b = Builder(local)
ICM = b.get_ICM(0.7)
URM_train_transpose = b.get_URM_transpose_train()
URM_train = b.get_URM_train()
if local == True:
    URM_test = b.get_URM_test()

In [None]:
def compute_similarity():
    recommender = ItemCBFKNNRecommender(URM_train, ICM)
    contentSimilarity = recommender.fit(shrink=0.0, topK=200)
    recommender = ItemCBFKNNRecommender(URM_train, URM_train_transpose)
    collaborativeSimilarity = recommender.fit(shrink=5.0, topK=500)
    recommender = ItemCBFKNNRecommender(URM_train, URM_train)
    userbasedSimilarity = recommender.fit(shrink=5.0, topK=200)
    return contentSimilarity, collaborativeSimilarity, userbasedSimilarity

def run_local(a, a1, a2):
    return evaluate_algorithm(URM_test, HybridRecommender(contentSimilarity, collaborativeSimilarity, userbasedSimilarity, a, a1, a2, b),b)#content value,collaborative value

def run_online(a, a1, a2):
    print_to_csv(contentSimilarity, collaborativeSimilarity, userbasedSimilarity, a, a1, a2)
    
def run(a, a1, a2):
    if local == True:
        run_local(a, a1, a2)
    else:
        run_online(a, a1, a2)

def print_to_csv(contentSimilarity, collaborativeSimilarity, userbasedSimilarity, a, a1, a2):
    file=open("hybrid-submission.csv",'a')
    file.write("playlist_id,track_ids"+"\n")
    recommender = HybridRecommender(contentSimilarity, collaborativeSimilarity, userbasedSimilarity, a, a1, a2, b)
    for playlist in b.get_ordered_target_playlists():
        s = str(recommender.recommend1(playlist,b))
        s = s[1:len(s)-1]
        file.write(str(playlist)+","+s+"\n")
    for playlist in b.get_unordered_target_playlists():
        s = str(recommender.recommend1(playlist,b))
        s = s[1:len(s)-1]
        file.write(str(playlist)+","+s+"\n")

In [None]:
 contentSimilarity, collaborativeSimilarity, userbasedSimilarity = compute_similarity()

In [None]:
for a in range(0, 5):
    for a2 in range(0, 10):
        print(str(a)+ " " + str(a2))
        print(run_local(0.1+a*0.05, 1, 0.1+a2*0.07))