In [1]:
from scipy.sparse import *
import numpy as np
import pandas as pd
import sys
import math
import recsys.recommenders as recommenders
import recsys.utility as utils
import recsys.builders as builders
import imp
import functools

In [None]:
utils = imp.reload(utils)
builders = imp.reload(builders)
recommenders = imp.reload(recommenders)

In [2]:
%%time
dataset = utils.Dataset.load()
dataset.split_holdout(test_size=1, min_playlist_tracks=13)
dataset.normalize()
dataset.build_urm()

CPU times: user 1min 27s, sys: 538 ms, total: 1min 28s
Wall time: 1min 28s


In [3]:
def sum_reducer(x):
    return functools.reduce(lambda a,b: a + b, x)

# Compute predictions

In [None]:
dataset = utils.Dataset.load()
dataset.normalize()
dataset.build_urm()

In [4]:
POW_urm_builder = builders.URMBuilder(norm="pow", pow_base=500, pow_exp=0.15)

TTM_dot_rec = recommenders.SimilarityRecommender('TTM_dot', 'TTM_dot', POW_urm_builder)
TTM_cosine_rec = recommenders.SimilarityRecommender('TTM_cosine', 'TTM_cosine', POW_urm_builder)
TTM_UUM_cosine_rec = recommenders.SimilarityRecommender('TTM_UUM_cosine', 'TTM_UUM_cosine', POW_urm_builder)
SYM_ARTIST_rec = recommenders.SimilarityRecommender('SYM_ARTIST', 'SYM_ARTIST', POW_urm_builder)
SYM_ALBUM_rec = recommenders.SimilarityRecommender('SYM_ALBUM', 'SYM_ALBUM', POW_urm_builder)
SYM_OWNER_rec = recommenders.SimilarityRecommender('SYM_OWNER', 'SYM_OWNER', POW_urm_builder)
BPR_rec = recommenders.BPRRecommender(name='bpr', urm_builder=POW_urm_builder)

In [5]:
recoms = [TTM_dot_rec, TTM_cosine_rec, TTM_UUM_cosine_rec, SYM_ARTIST_rec, SYM_ALBUM_rec, SYM_OWNER_rec, BPR_rec]

In [6]:
for r in recoms:
    r.fit(dataset)

10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
10000
20000
30000
40000
50000
60000
10000
20000
30000
40000
50000
60000
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
10000
20000
30000
40000
50000
60000
70000
80000
90000
100000
Preparing for compute_similarity
Calling compute_similarity
Calling BPRSLIM
Loading similarity


# Try ensemble with BPR

In [7]:
ens_rec = recommenders.EnsembleRecommender('ensemble', recommenders=recoms, reducer=sum_reducer)
ens_rec.fit(dataset)
ens_rec.evaluate()

Computing predictions for TTM_dot
TTM_dot: 0-1000 --> 0.06718000000000014
TTM_dot: 1000-2000 --> 0.06191333333333339
TTM_dot: 2000-3000 --> 0.060122222222222084
TTM_dot: 3000-4000 --> 0.05906999999999971
TTM_dot: 4000-5000 --> 0.05964533333333312
TTM_dot: 5000-6000 --> 0.06070444444444438
TTM_dot: 6000-6658 --> 0.06142485230800042
Computing predictions for TTM_cosine
TTM_cosine: 0-1000 --> 0.06833666666666684
TTM_cosine: 1000-2000 --> 0.06280166666666669
TTM_cosine: 2000-3000 --> 0.06059666666666641
TTM_cosine: 3000-4000 --> 0.06056333333333292
TTM_cosine: 4000-5000 --> 0.06062999999999986
TTM_cosine: 5000-6000 --> 0.06159333333333334
TTM_cosine: 6000-6658 --> 0.06204415740462613
Computing predictions for TTM_UUM_cosine
TTM_UUM_cosine: 0-1000 --> 0.0634133333333335
TTM_UUM_cosine: 1000-2000 --> 0.0587716666666667
TTM_UUM_cosine: 2000-3000 --> 0.05789666666666645
TTM_UUM_cosine: 3000-4000 --> 0.05797083333333295
TTM_UUM_cosine: 4000-5000 --> 0.058906666666666295
TTM_UUM_cosine: 5000-600

0.08264243516571583

# Try ensemble without BPR

In [8]:
ens2 = recommenders.EnsembleRecommender(name="ens2", recommenders=[TTM_dot_rec, TTM_cosine_rec, TTM_UUM_cosine_rec, SYM_ARTIST_rec, SYM_ALBUM_rec, SYM_OWNER_rec], reducer=sum_reducer)
ens2.fit(dataset)
ens2.evaluate()


Using cached predictions for TTM_dot
Using cached predictions for TTM_cosine
Using cached predictions for TTM_UUM_cosine
Using cached predictions for SYM_ARTIST
Using cached predictions for SYM_ALBUM
Using cached predictions for SYM_OWNER
ens2: 0-1000 --> 0.0893800000000001
ens2: 1000-2000 --> 0.08263666666666644
ens2: 2000-3000 --> 0.08259222222222178
ens2: 3000-4000 --> 0.0825324999999998
ens2: 4000-5000 --> 0.08320400000000003
ens2: 5000-6000 --> 0.08380666666666682
ens2: 6000-6658 --> 0.08403374386702757


0.08403374386702757

In [None]:
predictions_df = utils.from_prediction_matrix_to_dataframe(ens_rec.predictions, dataset, keep_best=5, map_tracks=True)
predictions_df['playlist_id'] = predictions_df['playlist_id_tmp']
predictions_df = predictions_df.drop("playlist_id_tmp", axis=1)

In [None]:
predictions_df['track_ids'] = predictions_df['track_ids'].apply(lambda x : ' '.join(map(str, x)))
predictions_df.to_csv('results_ensemble_andrea.csv', index=False)