In [1]:
from scipy.sparse import *
import numpy as np
import pandas as pd
import sys
import math
import recsys.recommenders as recommenders
import recsys.utility as utils
import recsys.builders as builders
import imp
import functools

In [2]:
utils = imp.reload(utils)
builders = imp.reload(builders)
recommenders = imp.reload(recommenders)

In [3]:
%%time
dataset = utils.Dataset.load()
dataset.split_holdout(test_size=1, min_playlist_tracks=13)
dataset.normalize()
dataset.build_urm()

CPU times: user 1min 32s, sys: 714 ms, total: 1min 32s
Wall time: 1min 32s


In [4]:
class WeightedAverage(object):
    def __init__(self, weights):
        self.weights = weights
    def __call__(self, predictions):
        res = csr_matrix(predictions[0].shape)
        
        for w,p in zip(self.weights, predictions):
            res += w*p
        
        return res

In [5]:
class WeightedAverageExp(object):
    def __init__(self, weights, exp_weights):
        self.weights = weights
        self.exp_weights = exp_weights
        
    def __call__(self, predictions):
        res = csr_matrix(predictions[0].shape)
        
        for w,p,e in zip(self.weights, predictions, self.exp_weights):
            p_copy = p.copy()
            p_copy.data = np.power(p_copy.data, e)
            res += w*p_copy
        
        return res

In [6]:
def sum_reducer(x):
    return functools.reduce(lambda a,b: a + b, x)

# Compute predictions

In [7]:
dataset = utils.Dataset.load()
dataset.split_holdout(test_size=1, min_playlist_tracks=13)
dataset.normalize()
dataset.build_urm()

In [8]:
POW_urm_builder = builders.URMBuilder(norm="pow", pow_base=500, pow_exp=0.15)

TTM_dot_rec = recommenders.SimilarityRecommender('TTM_dot', 'TTM_dot', POW_urm_builder)
TTM_cosine_rec = recommenders.SimilarityRecommender('TTM_cosine', 'TTM_cosine', POW_urm_builder)
TTM_UUM_cosine_rec = recommenders.SimilarityRecommender('TTM_UUM_cosine', 'TTM_UUM_cosine', POW_urm_builder)
SYM_ARTIST_rec = recommenders.SimilarityRecommender('SYM_ARTIST', 'SYM_ARTIST', POW_urm_builder)
SYM_ALBUM_rec = recommenders.SimilarityRecommender('SYM_ALBUM', 'SYM_ALBUM', POW_urm_builder)
SYM_OWNER_rec = recommenders.SimilarityRecommender('SYM_OWNER', 'SYM_OWNER', POW_urm_builder)
BPR_rec = recommenders.BPRRecommender(name='bpr', urm_builder=POW_urm_builder)

In [9]:
recoms = [TTM_dot_rec, TTM_cosine_rec, TTM_UUM_cosine_rec, SYM_ARTIST_rec, SYM_ALBUM_rec, SYM_OWNER_rec]

In [10]:
for r in recoms:
    r.fit(dataset)

10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
10000
20000
30000
40000
50000
60000
10000
20000
30000
40000
50000
60000
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000


# Evaluation of the ensemble

In [11]:
recoms = [recommenders.SimilarityRecommender(name=r.name, similarity=r.similarity, urm_builder=r.urm_builder) for r in recoms]
for r in recoms:
    r.fit(dataset)
    r.evaluate()

TTM_dot: 0-1000 --> 0.05555000000000013
TTM_dot: 1000-2000 --> 0.05685499999999997
TTM_dot: 2000-3000 --> 0.05757555555555526
TTM_dot: 3000-4000 --> 0.05814666666666623
TTM_dot: 4000-5000 --> 0.060656666666666297
TTM_dot: 5000-6000 --> 0.060152222222222065
TTM_dot: 6000-6658 --> 0.06058726344247516
TTM_cosine: 0-1000 --> 0.052236666666666765
TTM_cosine: 1000-2000 --> 0.054293333333333374
TTM_cosine: 2000-3000 --> 0.05617444444444422
TTM_cosine: 3000-4000 --> 0.05735583333333289
TTM_cosine: 4000-5000 --> 0.059749333333333016
TTM_cosine: 5000-6000 --> 0.05961444444444431
TTM_cosine: 6000-6658 --> 0.059973465505156705
TTM_UUM_cosine: 0-1000 --> 0.053080000000000065
TTM_UUM_cosine: 1000-2000 --> 0.05444833333333336
TTM_UUM_cosine: 2000-3000 --> 0.055259999999999795
TTM_UUM_cosine: 3000-4000 --> 0.056445833333332994
TTM_UUM_cosine: 4000-5000 --> 0.05871133333333308
TTM_UUM_cosine: 5000-6000 --> 0.05945611111111103
TTM_UUM_cosine: 6000-6658 --> 0.0601436867928307
SYM_ARTIST: 0-1000 --> 0.055

In [12]:
ens_rec = recommenders.EnsembleRecommender('ensemble', recommenders=recoms, reducer=sum_reducer)
ens_rec.fit(dataset)
ens_rec.evaluate()

Using cached predictions for TTM_dot
Using cached predictions for TTM_cosine
Using cached predictions for TTM_UUM_cosine
Using cached predictions for SYM_ARTIST
Using cached predictions for SYM_ALBUM
Using cached predictions for SYM_OWNER
ensemble: 0-1000 --> 0.07819000000000015
ensemble: 1000-2000 --> 0.08179666666666657
ensemble: 2000-3000 --> 0.08183666666666631
ensemble: 3000-4000 --> 0.08064833333333318
ensemble: 4000-5000 --> 0.0832679999999999
ensemble: 5000-6000 --> 0.08323833333333337
ensemble: 6000-6658 --> 0.08365324922399138


0.08365324922399138

# Evaluate prediction

In [13]:
dataset = utils.Dataset.load()
dataset.normalize()
dataset.build_urm()

In [14]:
POW_urm_builder = builders.URMBuilder(norm="pow", pow_base=500, pow_exp=0.15)

TTM_dot_rec = recommenders.SimilarityRecommender('TTM_dot', 'TTM_dot', POW_urm_builder)
TTM_cosine_rec = recommenders.SimilarityRecommender('TTM_cosine', 'TTM_cosine', POW_urm_builder)
TTM_UUM_cosine_rec = recommenders.SimilarityRecommender('TTM_UUM_cosine', 'TTM_UUM_cosine', POW_urm_builder)
SYM_ARTIST_rec = recommenders.SimilarityRecommender('SYM_ARTIST', 'SYM_ARTIST', POW_urm_builder)
SYM_ALBUM_rec = recommenders.SimilarityRecommender('SYM_ALBUM', 'SYM_ALBUM', POW_urm_builder)
SYM_OWNER_rec = recommenders.SimilarityRecommender('SYM_OWNER', 'SYM_OWNER', POW_urm_builder)
BPR_rec = recommenders.BPRRecommender(name='bpr', urm_builder=POW_urm_builder)

In [15]:
recoms = [TTM_dot_rec, TTM_cosine_rec, TTM_UUM_cosine_rec, SYM_ARTIST_rec, SYM_ALBUM_rec, SYM_OWNER_rec]

In [16]:
for r in recoms:
    print("Evaluating {}".format(r))
    r.fit(dataset)
    r.evaluate()

Evaluating TTM_dot
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
Evaluating TTM_cosine
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
Evaluating TTM_UUM_cosine
10000
20000
30000
40000
50000
60000
10000
20000
30000
40000
50000
60000
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
Evaluating SYM_ARTIST
Evaluating SYM_ALBUM
Evaluating SYM_OWNER
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000


In [17]:
ens_rec = recommenders.EnsembleRecommender('ensemble', recommenders=recoms, reducer=WeightedAverage([1.1, 1, 1.1, 1.45, 1.2, 1.3]))
ens_rec.fit(dataset)
ens_rec.evaluate()

Using cached predictions for TTM_dot
Using cached predictions for TTM_cosine
Using cached predictions for TTM_UUM_cosine
Using cached predictions for SYM_ARTIST
Using cached predictions for SYM_ALBUM
Using cached predictions for SYM_OWNER


0

## Output the result

In [18]:
predictions_df = utils.from_prediction_matrix_to_dataframe(ens_rec.predictions, dataset, keep_best=5, map_tracks=True)
predictions_df['playlist_id'] = predictions_df['playlist_id_tmp']
predictions_df = predictions_df.drop("playlist_id_tmp", axis=1)

In [19]:
predictions_df['track_ids'] = predictions_df['track_ids'].apply(lambda x : ' '.join(map(str, x)))
predictions_df.to_csv('result.csv', index=False)