In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import scipy.sparse as sps
import scipy as sc
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

['sample_submission.csv', 'submission.csv', 'target_playlists.csv', 'tracks.csv', 'train.csv']


In [2]:
class TopPopRecommender(object):

    def fit(self, train):
        
        self.train = train

        itemPopularity = (train>0).sum(axis=0)
        itemPopularity = np.array(itemPopularity).squeeze()

        # We are not interested in sorting the popularity value,
        # but to order the items according to it
        self.popularItems = np.argsort(itemPopularity)
        self.popularItems = np.flip(self.popularItems, axis = 0)
    
    
    def recommend(self, user_id, at=10, remove_seen=True):

        if remove_seen:
            unseen_items_mask = np.in1d(self.popularItems, self.train[user_id].indices,
                                        assume_unique=True, invert = True)

            unseen_items = self.popularItems[unseen_items_mask]

            recommended_items = unseen_items[0:at]

        else:
            recommended_items = self.popularItems[0:at]
            
        recommended_items = " ".join(str(i) for i in recommended_items)
        return recommended_items

In [3]:
data = np.genfromtxt('../input/train.csv' , delimiter=',',dtype=int)[1:]


In [4]:
playlist = data[:,0]
song = data[:,1]

In [5]:
unique_playlist = list(set(playlist))
unique_songs = list(set(song))

num_playlists = len(unique_playlist)
num_songs = len(unique_songs)

mat = sps.lil_matrix((num_playlists,num_songs))
mat[playlist,song] = 1
csr_mat = sc.sparse.csr_matrix(mat)

In [6]:
top_recommender = TopPopRecommender()

In [7]:
top_recommender.fit(csr_mat)
test = np.genfromtxt('../input/target_playlists.csv' , delimiter=',',dtype=int)[1:]
result = []
for test_point in test:
    recomendetion = top_recommender.recommend(test_point)
    temp = [test_point,recomendetion]
    result.append(temp)
rec = pd.DataFrame(result)
rec.to_csv("sample_submission.csv", index = False, header = ["playlist_id", "track_ids"])