In [1]:
from scipy.sparse import *
import numpy as np
import pandas as pd
import sys
import math
import recsys.recommenders as recommenders
import recsys.utility as utils
import recsys.builders as builders
import imp
import functools

In [2]:
utils = imp.reload(utils)
builders = imp.reload(builders)
recommenders = imp.reload(recommenders)

In [3]:
%%time
dataset = utils.Dataset.load()
dataset.split_holdout(test_size=1, min_playlist_tracks=13)
dataset.normalize()
dataset.build_urm()

CPU times: user 1min 25s, sys: 535 ms, total: 1min 26s
Wall time: 1min 26s


In [4]:
class WeightedAverage(object):
    def __init__(self, weights):
        self.weights = weights
    def __call__(self, predictions):
        res = csr_matrix(predictions[0].shape)
        
        for w,p in zip(self.weights, predictions):
            res += w*p
        
        return res

In [5]:
class WeightedAverageExp(object):
    def __init__(self, weights, exp_weights):
        self.weights = weights
        self.exp_weights = exp_weights
        
    def __call__(self, predictions):
        res = csr_matrix(predictions[0].shape)
        
        for w,p,e in zip(self.weights, predictions, self.exp_weights):
            p_copy = p.copy()
            p_copy.data = np.power(p_copy.data, e)
            res += w*p_copy
        
        return res

In [6]:
def sum_reducer(x):
    return functools.reduce(lambda a,b: a + b, x)

# Compute predictions

In [7]:
dataset = utils.Dataset.load()
dataset.split_holdout(test_size=1, min_playlist_tracks=13)
dataset.normalize()
dataset.build_urm()

In [8]:
POW_urm_builder = builders.URMBuilder(norm="pow", pow_base=500, pow_exp=0.15)

TTM_dot_rec = recommenders.SimilarityRecommender('TTM_dot', 'TTM_dot', POW_urm_builder)
TTM_cosine_rec = recommenders.SimilarityRecommender('TTM_cosine', 'TTM_cosine', POW_urm_builder)
TTM_UUM_cosine_rec = recommenders.SimilarityRecommender('TTM_UUM_cosine', 'TTM_UUM_cosine', POW_urm_builder)
SYM_ARTIST_rec = recommenders.SimilarityRecommender('SYM_ARTIST', 'SYM_ARTIST', POW_urm_builder)
SYM_ALBUM_rec = recommenders.SimilarityRecommender('SYM_ALBUM', 'SYM_ALBUM', POW_urm_builder)
SYM_OWNER_rec = recommenders.SimilarityRecommender('SYM_OWNER', 'SYM_OWNER', POW_urm_builder)
BPR_rec = recommenders.BPRRecommender(name='bpr', urm_builder=POW_urm_builder)

In [9]:
recoms = [TTM_dot_rec, TTM_cosine_rec, TTM_UUM_cosine_rec, SYM_ARTIST_rec, SYM_ALBUM_rec, SYM_OWNER_rec]

In [10]:
for r in recoms:
    r.fit(dataset)

10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
10000
20000
30000
40000
50000
60000
10000
20000
30000
40000
50000
60000
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000


# Try ensemble with BPR

In [11]:
recoms = [recommenders.SimilarityRecommender(name=r.name, similarity=r.similarity, urm_builder=r.urm_builder) for r in recoms]
for r in recoms:
    r.fit(dataset)
    r.evaluate()

TTM_dot: 0-1000 --> 0.05978000000000015
TTM_dot: 1000-2000 --> 0.06058333333333336
TTM_dot: 2000-3000 --> 0.06037777777777753
TTM_dot: 3000-4000 --> 0.06004083333333293
TTM_dot: 4000-5000 --> 0.06030933333333308
TTM_dot: 5000-6000 --> 0.05939055555555548
TTM_dot: 6000-6658 --> 0.06012766596575551
TTM_cosine: 0-1000 --> 0.06089666666666679
TTM_cosine: 1000-2000 --> 0.061430000000000075
TTM_cosine: 2000-3000 --> 0.0616311111111109
TTM_cosine: 3000-4000 --> 0.061075833333332955
TTM_cosine: 4000-5000 --> 0.06152533333333314
TTM_cosine: 5000-6000 --> 0.06063666666666659
TTM_cosine: 6000-6658 --> 0.06110343446480425
TTM_UUM_cosine: 0-1000 --> 0.06227666666666679
TTM_UUM_cosine: 1000-2000 --> 0.06058166666666674
TTM_UUM_cosine: 2000-3000 --> 0.05974999999999973
TTM_UUM_cosine: 3000-4000 --> 0.06073249999999957
TTM_UUM_cosine: 4000-5000 --> 0.06057133333333306
TTM_UUM_cosine: 5000-6000 --> 0.05961888888888877
TTM_UUM_cosine: 6000-6658 --> 0.06034995494142381
SYM_ARTIST: 0-1000 --> 0.0563233333

In [12]:
ens_rec = recommenders.EnsembleRecommender('ensemble', recommenders=recoms, reducer=sum_reducer)
ens_rec.fit(dataset)
ens_rec.evaluate()

Using cached predictions for TTM_dot
Using cached predictions for TTM_cosine
Using cached predictions for TTM_UUM_cosine
Using cached predictions for SYM_ARTIST
Using cached predictions for SYM_ALBUM
Using cached predictions for SYM_OWNER
ensemble: 0-1000 --> 0.08757000000000006
ensemble: 1000-2000 --> 0.08715333333333314
ensemble: 2000-3000 --> 0.08458222222222171
ensemble: 3000-4000 --> 0.08369749999999997
ensemble: 4000-5000 --> 0.08357600000000015
ensemble: 5000-6000 --> 0.08253722222222248
ensemble: 6000-6658 --> 0.08314859317112282


0.08314859317112282

In [13]:
ens_rec = recommenders.EnsembleRecommender('ensemble', recommenders=recoms, reducer=WeightedAverage([1, 1, 1, 1.45, 1.2, 1]))
ens_rec.fit(dataset)
ens_rec.evaluate()

Using cached predictions for TTM_dot
Using cached predictions for TTM_cosine
Using cached predictions for TTM_UUM_cosine
Using cached predictions for SYM_ARTIST
Using cached predictions for SYM_ALBUM
Using cached predictions for SYM_OWNER
ensemble: 0-1000 --> 0.08709666666666668
ensemble: 1000-2000 --> 0.0872099999999997
ensemble: 2000-3000 --> 0.08450555555555501
ensemble: 3000-4000 --> 0.08363999999999999
ensemble: 4000-5000 --> 0.08367266666666688
ensemble: 5000-6000 --> 0.0826838888888892
ensemble: 6000-6658 --> 0.08316962050665913


0.08316962050665913

In [14]:
ens_rec = recommenders.EnsembleRecommender('ensemble', recommenders=recoms, reducer=WeightedAverage([1.1, 1, 1, 1.45, 1.2, 1]))
ens_rec.fit(dataset)
ens_rec.evaluate()

Using cached predictions for TTM_dot
Using cached predictions for TTM_cosine
Using cached predictions for TTM_UUM_cosine
Using cached predictions for SYM_ARTIST
Using cached predictions for SYM_ALBUM
Using cached predictions for SYM_OWNER
ensemble: 0-1000 --> 0.08705000000000003
ensemble: 1000-2000 --> 0.08703833333333304
ensemble: 2000-3000 --> 0.0844044444444439
ensemble: 3000-4000 --> 0.08354249999999995
ensemble: 4000-5000 --> 0.08353200000000016
ensemble: 5000-6000 --> 0.08258722222222245
ensemble: 6000-6658 --> 0.08306398317813189


0.08306398317813189

In [15]:
ens_rec = recommenders.EnsembleRecommender('ensemble', recommenders=recoms, reducer=WeightedAverage([1.1, 1, 1, 1.45, 1.2, 1.2]))
ens_rec.fit(dataset)
ens_rec.evaluate()

Using cached predictions for TTM_dot
Using cached predictions for TTM_cosine
Using cached predictions for TTM_UUM_cosine
Using cached predictions for SYM_ARTIST
Using cached predictions for SYM_ALBUM
Using cached predictions for SYM_OWNER
ensemble: 0-1000 --> 0.08745333333333336
ensemble: 1000-2000 --> 0.08698499999999973
ensemble: 2000-3000 --> 0.08437999999999951
ensemble: 3000-4000 --> 0.08366333333333333
ensemble: 4000-5000 --> 0.08359200000000017
ensemble: 5000-6000 --> 0.08273888888888913
ensemble: 6000-6658 --> 0.0832221888454995


0.0832221888454995

In [16]:
ens_rec = recommenders.EnsembleRecommender('ensemble', recommenders=recoms, reducer=WeightedAverage([1.1, 1, 1.1, 1.45, 1.2, 1.2]))
ens_rec.fit(dataset)
ens_rec.evaluate()

Using cached predictions for TTM_dot
Using cached predictions for TTM_cosine
Using cached predictions for TTM_UUM_cosine
Using cached predictions for SYM_ARTIST
Using cached predictions for SYM_ALBUM
Using cached predictions for SYM_OWNER
ensemble: 0-1000 --> 0.08762666666666669
ensemble: 1000-2000 --> 0.08713999999999973
ensemble: 2000-3000 --> 0.08452888888888838
ensemble: 3000-4000 --> 0.08362166666666661
ensemble: 4000-5000 --> 0.08352666666666678
ensemble: 5000-6000 --> 0.08267000000000022
ensemble: 6000-6658 --> 0.08321868428957682


0.08321868428957682

In [17]:
ens_rec = recommenders.EnsembleRecommender('ensemble', recommenders=recoms, reducer=WeightedAverage([1.1, 1, 1.1, 1.45, 1.2, 1.3]))
ens_rec.fit(dataset)
ens_rec.evaluate()

Using cached predictions for TTM_dot
Using cached predictions for TTM_cosine
Using cached predictions for TTM_UUM_cosine
Using cached predictions for SYM_ARTIST
Using cached predictions for SYM_ALBUM
Using cached predictions for SYM_OWNER
ensemble: 0-1000 --> 0.0877566666666667
ensemble: 1000-2000 --> 0.08732499999999974
ensemble: 2000-3000 --> 0.08452666666666614
ensemble: 3000-4000 --> 0.08376583333333325
ensemble: 4000-5000 --> 0.08362666666666677
ensemble: 5000-6000 --> 0.08278333333333353
ensemble: 6000-6658 --> 0.08330629818764433


0.08330629818764433

# Evaluate prediction

In [18]:
dataset = utils.Dataset.load()
dataset.normalize()
dataset.build_urm()

In [19]:
POW_urm_builder = builders.URMBuilder(norm="pow", pow_base=500, pow_exp=0.15)

TTM_dot_rec = recommenders.SimilarityRecommender('TTM_dot', 'TTM_dot', POW_urm_builder)
TTM_cosine_rec = recommenders.SimilarityRecommender('TTM_cosine', 'TTM_cosine', POW_urm_builder)
TTM_UUM_cosine_rec = recommenders.SimilarityRecommender('TTM_UUM_cosine', 'TTM_UUM_cosine', POW_urm_builder)
SYM_ARTIST_rec = recommenders.SimilarityRecommender('SYM_ARTIST', 'SYM_ARTIST', POW_urm_builder)
SYM_ALBUM_rec = recommenders.SimilarityRecommender('SYM_ALBUM', 'SYM_ALBUM', POW_urm_builder)
SYM_OWNER_rec = recommenders.SimilarityRecommender('SYM_OWNER', 'SYM_OWNER', POW_urm_builder)
BPR_rec = recommenders.BPRRecommender(name='bpr', urm_builder=POW_urm_builder)

In [20]:
recoms = [TTM_dot_rec, TTM_cosine_rec, TTM_UUM_cosine_rec, SYM_ARTIST_rec, SYM_ALBUM_rec, SYM_OWNER_rec]

In [21]:
for r in recoms:
    print("Evaluating {}".format(r))
    r.fit(dataset)
    r.evaluate()

Evaluating TTM_dot
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
Evaluating TTM_cosine
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
Evaluating TTM_UUM_cosine
10000
20000
30000
40000
50000
60000
10000
20000
30000
40000
50000
60000
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
Evaluating SYM_ARTIST
Evaluating SYM_ALBUM
Evaluating SYM_OWNER
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000


In [22]:
ens_rec = recommenders.EnsembleRecommender('ensemble', recommenders=recoms, reducer=WeightedAverage([1.1, 1, 1.1, 1.45, 1.2, 1.3]))
ens_rec.fit(dataset)
ens_rec.evaluate()

Using cached predictions for TTM_dot
Using cached predictions for TTM_cosine
Using cached predictions for TTM_UUM_cosine
Using cached predictions for SYM_ARTIST
Using cached predictions for SYM_ALBUM
Using cached predictions for SYM_OWNER


0

In [23]:
predictions_df = utils.from_prediction_matrix_to_dataframe(ens_rec.predictions, dataset, keep_best=5, map_tracks=True)
predictions_df['playlist_id'] = predictions_df['playlist_id_tmp']
predictions_df = predictions_df.drop("playlist_id_tmp", axis=1)

In [24]:
predictions_df['track_ids'] = predictions_df['track_ids'].apply(lambda x : ' '.join(map(str, x)))
predictions_df.to_csv('results_ensemble_weights.csv', index=False)

In [25]:
ens_rec = recommenders.EnsembleRecommender('ensemble', recommenders=recoms, reducer=sum_reducer)
ens_rec.fit(dataset)
ens_rec.evaluate()

Using cached predictions for TTM_dot
Using cached predictions for TTM_cosine
Using cached predictions for TTM_UUM_cosine
Using cached predictions for SYM_ARTIST
Using cached predictions for SYM_ALBUM
Using cached predictions for SYM_OWNER


0

In [26]:
def print_for_i(i):
    for r in recoms:
        print(sorted(r.predictions[i].data, reverse=True))

In [27]:
print_for_i(4)

[0.2332192157917157, 0.22790387300613421, 0.21927361072975804, 0.2091130512585169, 0.20712127270612307, 0.20563427104850066, 0.20533685006761856, 0.20475238735629653, 0.18993287726518387, 0.18818062371479241]
[0.24097923640337876, 0.23967643409975545, 0.22720524434344719, 0.22394366359819212, 0.20466120340784869, 0.20174736147050401, 0.18671282174117701, 0.17913586528714201, 0.17912229671264626, 0.17717357282257207]
[0.38096959888356546, 0.35625324455408353, 0.33545580347330173, 0.32344543565266426, 0.28801785293184512, 0.25854596722820627, 0.24907810883323764, 0.24467956238604233, 0.23776085588632728, 0.23216644991869448]
[0.66353023362773689, 0.66353023362773689, 0.66353023362773689, 0.48010475152853888, 0.48010475152853888, 0.48010475152853888, 0.3325437191614799, 0.24254892801598804, 0.24254892801598804, 0.24254892801598804]
[0.53085684137188027, 0.53085684137188027, 0.34386284461346578, 0.33362469427615432, 0.33362469427615432, 0.3217459338059348, 0.31617646844439007, 0.2767667112

In [28]:
ens_rec = recommenders.EnsembleRecommender('ensemble', recommenders=recoms, reducer=sum_reducer)
ens_rec.fit(dataset)
ens_rec.evaluate()

Using cached predictions for TTM_dot
Using cached predictions for TTM_cosine
Using cached predictions for TTM_UUM_cosine
Using cached predictions for SYM_ARTIST
Using cached predictions for SYM_ALBUM
Using cached predictions for SYM_OWNER


0

# Try ensemble without BPR

In [29]:
ens2 = recommenders.EnsembleRecommender(name="ens2", recommenders=[TTM_dot_rec, TTM_cosine_rec, TTM_UUM_cosine_rec, SYM_ARTIST_rec, SYM_ALBUM_rec, SYM_OWNER_rec], reducer=sum_reducer)
ens2.fit(dataset)
ens2.evaluate()


Using cached predictions for TTM_dot
Using cached predictions for TTM_cosine
Using cached predictions for TTM_UUM_cosine
Using cached predictions for SYM_ARTIST
Using cached predictions for SYM_ALBUM
Using cached predictions for SYM_OWNER


0

In [30]:
recoms = [recommenders.SimilarityRecommender(name=r.name, similarity=r.similarity, urm_builder=r.urm_builder) for r in recoms]

In [31]:
for r in recoms:
    r.fit(dataset)

In [32]:
ens3 = recommenders.EnsembleRecommender(name="ens3", recommenders=recoms[:-1], reducer=WeightedAverage([0.9568473700404467, 0.5806196453661655, 0.531052288397153, 0.5829634486249624, 1.8867463476832003, 0.403196160018844]))
ens3.fit(dataset)
ens3.evaluate()

Computing predictions for TTM_dot


ValueError: row index 10024884 out of bounds

In [None]:
predictions_df = utils.from_prediction_matrix_to_dataframe(ens_rec.predictions, dataset, keep_best=5, map_tracks=True)
predictions_df['playlist_id'] = predictions_df['playlist_id_tmp']
predictions_df = predictions_df.drop("playlist_id_tmp", axis=1)

In [None]:
predictions_df['track_ids'] = predictions_df['track_ids'].apply(lambda x : ' '.join(map(str, x)))
predictions_df.to_csv('results_ensemble_andrea.csv', index=False)