<h1>Recommender system challenge PoliMi 2018</h1>

<b>Import dependencies</b>

In [50]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import recommender as recommender

<b>See if we upload it correctly</b>

In [51]:
train = pd.read_csv("all/train.csv")
train.head()

Unnamed: 0,playlist_id,track_id
0,0,14301
1,0,8360
2,0,12844
3,0,18397
4,0,1220


In [52]:
playlist_list = list(np.asarray(train['playlist_id']))
track_list = list(np.asarray(train['track_id']))

In [53]:
tracks = pd.read_csv("all/tracks.csv")
tracks.head()

Unnamed: 0,track_id,album_id,artist_id,duration_sec
0,0,6306,449,167
1,1,12085,4903,185
2,2,1885,6358,201
3,3,3989,1150,263
4,4,11633,4447,96


In [54]:
all_track_list = list(np.asarray(train['track_id']))
album_list = list(np.asarray(tracks['album_id']))
artist_list = list(np.asarray(tracks['artist_id']))
duration_list = list(np.asarray(tracks['duration_sec']))

In [55]:
#number of different playlist, tracks, albums, artists
playlist_unique = list(set(playlist_list))
track_unique = list(set(track_list))
album_unique = list(set(album_list))
artist_unique = list(set(artist_list))

num_playlists = len(playlist_unique)
num_tracks = len(track_unique)
num_albums = len(album_unique)
num_artists = len(artist_unique)



<

In [56]:
import scipy.sparse as sps

URM_train = sps.coo_matrix((track_list, (playlist_list, all_track_list)))

URM_train

<50446x20635 sparse matrix of type '<class 'numpy.int64'>'
	with 1211791 stored elements in COOrdinate format>

In [57]:
URM_train.tocsr()

<50446x20635 sparse matrix of type '<class 'numpy.int64'>'
	with 1211791 stored elements in Compressed Sparse Row format>

In [58]:
train_test_split = 0.80

numInteractions = URM_train.nnz


train_mask = np.random.choice([True,False], numInteractions, p=[train_test_split, 1-train_test_split])
train_mask

array([ True,  True,  True, ...,  True,  True,  True])

In [61]:
track_list = np.array(track_list)
playlist_list = np.array(playlist_list)
all_track_list = np.array(all_track_list)


URM_train = sps.coo_matrix((track_list[train_mask], (playlist_list[train_mask], all_track_list[train_mask])))
URM_train = URM_train.tocsr()
URM_train

<50446x20635 sparse matrix of type '<class 'numpy.int64'>'
	with 969218 stored elements in Compressed Sparse Row format>

In [62]:
test_mask = np.logical_not(train_mask)

URM_test = sps.coo_matrix((track_list[test_mask], (playlist_list[test_mask], all_track_list[test_mask])))
URM_test = URM_test.tocsr()
URM_test

<50446x20635 sparse matrix of type '<class 'numpy.int64'>'
	with 242573 stored elements in Compressed Sparse Row format>

<h3>Mean Average Precision</h3>

In [63]:
def MAP(recommended_items, relevant_items):
   
    is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)
    
    # Cumulative sum: precision at 1, at 2, at 3 ...
    p_at_k = is_relevant * np.cumsum(is_relevant, dtype=np.float32) / (1 + np.arange(is_relevant.shape[0]))
    
    map_score = np.sum(p_at_k) / np.min([relevant_items.shape[0], is_relevant.shape[0]])

    return map_score

In [64]:
def evaluate_algorithm(URM_test, recommender_object, at=10):
    
    cumulative_MAP = 0.0
    
    num_eval = 0


    for user_id in playlist_unique:

        relevant_items = URM_test[playlist_id].indices
        
        if len(relevant_items)>0:
            
            recommended_items = recommender_object.recommend(playlist_id, at=at)
            num_eval+=1

            cumulative_MAP += MAP(recommended_items, relevant_items)

    cumulative_MAP /= num_eval
    
    print("Recommender performance is: MAP = {:.4f}".format(cumulative_MAP)) 

<h2>TopPop Recommender</h2>

In [65]:
class TopPopRecommender(object):

    def fit(self, URM_train):

        itemPopularity = (URM_train>0).sum(axis=0)
        itemPopularity = np.array(itemPopularity).squeeze()

        # We are not interested in sorting the popularity value,
        # but to order the items according to it
        self.popularItems = np.argsort(itemPopularity)
        self.popularItems = np.flip(self.popularItems, axis = 0)
    
    
    def recommend(self, user_id, at=10):
    
        recommended_items = self.popularItems[0:at]

        return recommended_items

<h3>Fit and test the model</h3>

In [66]:
topPopRecommender = TopPopRecommender()
topPopRecommender.fit(URM_train)

In [67]:
for playlist_id in playlist_unique[0:10]:
    print(topPopRecommender.recommend(playlist_id, at=10))

[ 8956 10848  5606 15578 17239 10496 13980  2674 18266  2272]
[ 8956 10848  5606 15578 17239 10496 13980  2674 18266  2272]
[ 8956 10848  5606 15578 17239 10496 13980  2674 18266  2272]
[ 8956 10848  5606 15578 17239 10496 13980  2674 18266  2272]
[ 8956 10848  5606 15578 17239 10496 13980  2674 18266  2272]
[ 8956 10848  5606 15578 17239 10496 13980  2674 18266  2272]
[ 8956 10848  5606 15578 17239 10496 13980  2674 18266  2272]
[ 8956 10848  5606 15578 17239 10496 13980  2674 18266  2272]
[ 8956 10848  5606 15578 17239 10496 13980  2674 18266  2272]
[ 8956 10848  5606 15578 17239 10496 13980  2674 18266  2272]


In [68]:
evaluate_algorithm(URM_test, topPopRecommender, at=10)

Recommender performance is: MAP = 0.0000


In [40]:
test = pd.read_csv("all/target_playlists.csv")
test.head()

Unnamed: 0,playlist_id
0,7
1,25
2,29
3,34
4,50


In [70]:
#top_recommendations
top_rec = recommender.top_pop_rec()
top_rec.create(train, 'playlist_id', 'track_id')

In [74]:
result = top_rec.recommend(test)
result

#not sure about the playlist column

Unnamed: 0,playlist,track_id,score,Rank
8956,40162.0,8956,1785,1.0
10848,,10848,1725,2.0
5606,5781.0,5606,1666,3.0
15578,,15578,1579,4.0
10496,,10496,1425,5.0
17239,,17239,1394,6.0
2674,26902.0,2674,1392,7.0
13980,,13980,1379,8.0
18266,,18266,1302,9.0
2272,23091.0,2272,1300,10.0
