# Baselines - playlist generation for known users

In [1]:
%matplotlib inline

import os, sys, time, gzip
import pickle as pkl
import numpy as np
from scipy.sparse import lil_matrix, issparse

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# from tools import calc_RPrecision_HitRate
from tools import calc_metrics, diversity, softmax

In [3]:
TOPs = [5, 10, 20, 30, 50, 100, 200, 300, 500, 700, 1000]

In [4]:
datasets = ['aotm2011', '30music']

In [5]:
dix = 0
dataset_name = datasets[dix]
dataset_name

'aotm2011'

In [6]:
data_dir = 'data/%s/coldstart/setting4' % dataset_name
X = pkl.load(gzip.open(os.path.join(data_dir, 'X.pkl.gz'), 'rb'))
Y_train = pkl.load(gzip.open(os.path.join(data_dir, 'Y_train.pkl.gz'), 'rb'))
Y_test = pkl.load(gzip.open(os.path.join(data_dir, 'Y_test.pkl.gz'), 'rb'))
song2pop_train = pkl.load(gzip.open(os.path.join(data_dir, 'song2pop_train.pkl.gz'), 'rb'))

In [7]:
playlists3 = pkl.load(gzip.open(os.path.join(data_dir, 'playlists_train_test_s4.pkl.gz'), 'rb'))
train_playlists = playlists3['train_playlists']
test_playlists = playlists3['test_playlists']

In [8]:
all_songs = pkl.load(gzip.open(os.path.join(data_dir, 'all_songs.pkl.gz'), 'rb'))
index2song = {ix: sid for ix, (sid, _) in enumerate(all_songs)}

In [9]:
song2index = {sid: ix for ix, (sid, _) in enumerate(all_songs)}

In [10]:
_song2artist = pkl.load(gzip.open('data/msd/song2artist.pkl.gz', 'rb'))
song2artist = {sid: _song2artist[sid] for sid, _ in all_songs if sid in _song2artist}

In [11]:
artist2songs = dict()

for sid in sorted(song2artist):
    artist = song2artist[sid]
    try:
        artist2songs[artist].append(sid)
    except KeyError:
        artist2songs[artist] = [sid]

In [12]:
print('{:,} | {:,}'.format(len(song2artist), len(artist2songs)))

111,993 | 15,698


In [13]:
artist2pop = dict()

for pl, _ in train_playlists:
    for sid in pl:
        if sid in song2artist:
            aid = song2artist[sid]
            try:
                artist2pop[aid] += 1
            except KeyError:
                artist2pop[aid] = 1

In [14]:
print(len(artist2pop))

15698


In [15]:
song2genre = pkl.load(gzip.open('data/msd/song2genre.pkl.gz', 'rb'))

In [16]:
song2pop = pkl.load(gzip.open(os.path.join(data_dir, 'song2pop.pkl.gz'), 'rb'))

### Collocated Artists - Greatest Hits (CAGH), Top 10 Artists

Compute the similarity of two artist $a_1$ and $a_2$ given a set of playlist $P$:   
$$
\text{sim}(a_1, a_2) 
= \frac{\sum_{p \in P} \delta(a_1, p) \times \delta(a_2, p)}
       {\sqrt{\sum_{p \in P} \delta(a_1, p) \times \sum_{p \in P} \delta(a_2, p)}}
$$
where
$$
\delta(a, p) 
= \begin{cases}
1, \ \text{at least one song in playlist $p$ is from artist $a$}, \\
0, \ \text{otherwise}.
\end{cases}
$$

Recommend according to the popularity of songs, but weighted by similarity of (`top 10 artists`, `artist of song`).

In [17]:
all_artist = sorted(set([song2artist[sid] for pl, _ in train_playlists for sid in pl if sid in song2artist]))

In [18]:
artist2index = {aid: ix for ix, aid in enumerate(all_artist)}

In [19]:
Na = len(all_artist)
Np = len(train_playlists)
Delta = lil_matrix((Na, Np), dtype=np.float)
for j in range(Np):
    pl_artist = sorted(set([song2artist[sid] for sid in train_playlists[j][0] if sid in song2artist]))
    ix = [artist2index[aid] for aid in pl_artist]
    Delta[ix, j] = 1

In [20]:
Delta = Delta.tocsr()
Dsum = Delta.sum(axis=1).A.reshape(-1)
ColloMat = Delta.dot(Delta.T).A

assert np.all(np.isclose(ColloMat.diagonal(), Dsum))

In [21]:
print(len(Dsum), len(all_artist))

15698 15698


In [22]:
#type(ColloMat)

In [23]:
T1 = 1. / np.sqrt(Dsum)
NormMat = np.dot(T1.reshape(Na, 1), T1.reshape(1, Na))

WeightMat = np.multiply(ColloMat, NormMat)

In [24]:
rps_cagh = []
hitrates_cagh = {top: [] for top in TOPs}
aucs_cagh = []
novelties_cagh = {top: dict() for top in TOPs}
artist_diversities_cagh = {top: [] for top in TOPs}
genre_diversities_cagh = {top: [] for top in TOPs}
np.random.seed(0)

assert Y_test.shape[1] == len(test_playlists)

sid_legal = [sid for sid, _ in all_songs if sid in song2artist]
aix_legal = [artist2index[song2artist[sid]] for sid in sid_legal]
pop_legal = np.asarray([song2pop_train[sid] for sid in sid_legal])
ix_legal = [song2index[sid] for sid in sid_legal]

top10_artists = sorted(artist2pop, key=lambda aid: artist2pop[aid])[-10:]
top10_artists_ix = [artist2index[aix] for aix in top10_artists]
y_pred = np.zeros(Y_test.shape[0])
y_pred[ix_legal] = np.log(pop_legal) * np.asarray([WeightMat[aix, top10_artists_ix].sum() for aix in aix_legal])

y_pred_prob = softmax(y_pred)
spread_cagh = -np.dot(y_pred_prob, np.log(y_pred_prob))
sortix = np.argsort(-y_pred)

for j in range(Y_test.shape[1]):
    if (j + 1) % 10 == 0:
        sys.stdout.write('\r%d / %d' % (j+1, Y_test.shape[1]))
        sys.stdout.flush()
    y_true = Y_test[:, j].A.reshape(-1)
    
    # rp, hr_dict = calc_RPrecision_HitRate(y_true, y_pred, tops=TOPs)
    rp, hr_dict, auc = calc_metrics(y_true, y_pred, tops=TOPs)
    rps_cagh.append(rp)
    for top in TOPs:
        hitrates_cagh[top].append(hr_dict[top])
    aucs_cagh.append(auc)
    
    # novelty
    u = test_playlists[j][1]
    for top in TOPs:
        nov = np.mean([-np.log2(song2pop[index2song[ix]]) for ix in sortix[:top]])
        try:
            novelties_cagh[top][u].append(nov)
        except KeyError:
            novelties_cagh[top][u] = [nov]
    
    # artist/genre diversity
    for top in TOPs:
        artist_vec = np.array([song2artist[index2song[ix]] if index2song[ix] in song2artist
                               else str(np.random.rand()) for ix in sortix[:top]])
        genre_vec = np.array([song2genre[index2song[ix]] if index2song[ix] in song2genre \
                              else str(np.random.rand()) for ix in sortix[:top]])
        artist_diversities_cagh[top].append( diversity(artist_vec) )
        genre_diversities_cagh[top].append( diversity(genre_vec) )

print('\n%d / %d' % (len(rps_cagh), Y_test.shape[1]))

8260 / 8260
8260 / 8260


In [25]:
# fig = plt.figure(figsize=[20, 5])
# ax1 = plt.subplot(131)
# ax1.hist(rps_cagh, bins=100)
# ax1.set_yscale('log')
# ax1.set_title('R-Precision')
# #ax.set_xlim(0, xmax)
# ax2 = plt.subplot(132)
# ax2.hist(aucs_cagh, bins=100)
# ax2.set_yscale('log')
# ax2.set_title('AUC')
# pass

In [26]:
cagh = {dataset_name: {'Test': {'R-Precision': np.mean(rps_cagh), 
                                'Hit-Rate': {top: np.mean(hitrates_cagh[top]) for top in TOPs},
                                'AUC': np.mean(aucs_cagh),
                                'Spread': spread_cagh,
                                'Novelty': {t: np.mean([np.mean(novelties_cagh[t][u]) 
                                                        for u in novelties_cagh[t]]) for t in TOPs},
                                'Artist-Diversity': {t: np.mean(artist_diversities_cagh[t]) for t in TOPs},
                                'Genre-Diversity': {t: np.mean(genre_diversities_cagh[t]) for t in TOPs}},
                       'Test_All': {'R-Precision': rps_cagh, 
                                    'Hit-Rate': {top: hitrates_cagh[top] for top in TOPs},
                                    'AUC': aucs_cagh,
                                    'Spread': spread_cagh,
                                    'Novelty': novelties_cagh,
                                    'Artist-Diversity': artist_diversities_cagh,
                                    'Genre-Diversity': genre_diversities_cagh}}}
cagh[dataset_name]['Test']

{'R-Precision': 0.010752888364457081,
 'Hit-Rate': {5: 0.007189499596544438,
  10: 0.010737942236317412,
  20: 0.017424645772924005,
  30: 0.022505021135506318,
  50: 0.031438483108188066,
  100: 0.049118295099717,
  200: 0.06308437695173645,
  300: 0.07065403232337869,
  500: 0.0787953585064552,
  700: 0.10703869239355554,
  1000: 0.16627896028004344},
 'AUC': 0.8808180501207196,
 'Spread': 5.274721978715014,
 'Novelty': {5: -9.172677325016553,
  10: -8.881461910316037,
  20: -8.61813233378527,
  30: -8.397109599952675,
  50: -8.192052876194628,
  100: -7.848922134185842,
  200: -7.177997031143748,
  300: -6.750936363732949,
  500: -6.000442273003983,
  700: -5.637172472185597,
  1000: -5.780984125184579},
 'Artist-Diversity': {5: 0.8000000000000002,
  10: 0.6888888888888888,
  20: 0.6473684210526314,
  30: 0.6390804597701151,
  50: 0.7608163265306124,
  100: 0.8490909090909095,
  200: 0.8637688442211057,
  300: 0.8845484949832779,
  500: 0.8934428857715434,
  700: 0.9110075618230126,

In [27]:
fperf_cagh = os.path.join(data_dir, 'perf-cagh.pkl')
print(fperf_cagh)
pkl.dump(cagh, open(fperf_cagh, 'wb'))
pkl.load(open(fperf_cagh, 'rb'))[dataset_name]['Test']

data/aotm2011/coldstart/setting4/perf-cagh.pkl


{'R-Precision': 0.010752888364457081,
 'Hit-Rate': {5: 0.007189499596544438,
  10: 0.010737942236317412,
  20: 0.017424645772924005,
  30: 0.022505021135506318,
  50: 0.031438483108188066,
  100: 0.049118295099717,
  200: 0.06308437695173645,
  300: 0.07065403232337869,
  500: 0.0787953585064552,
  700: 0.10703869239355554,
  1000: 0.16627896028004344},
 'AUC': 0.8808180501207196,
 'Spread': 5.274721978715014,
 'Novelty': {5: -9.172677325016553,
  10: -8.881461910316037,
  20: -8.61813233378527,
  30: -8.397109599952675,
  50: -8.192052876194628,
  100: -7.848922134185842,
  200: -7.177997031143748,
  300: -6.750936363732949,
  500: -6.000442273003983,
  700: -5.637172472185597,
  1000: -5.780984125184579},
 'Artist-Diversity': {5: 0.8000000000000002,
  10: 0.6888888888888888,
  20: 0.6473684210526314,
  30: 0.6390804597701151,
  50: 0.7608163265306124,
  100: 0.8490909090909095,
  200: 0.8637688442211057,
  300: 0.8845484949832779,
  500: 0.8934428857715434,
  700: 0.9110075618230126,

### Same Artists - Greatest Hits (SAGH), Top 10 Artists

Recommending according to the popularity of songs of the top 10 most popular artists in data.

In [17]:
rps_sagh = []
hitrates_sagh = {top: [] for top in TOPs}
aucs_sagh = []
novelties_sagh = {top: dict() for top in TOPs}
artist_diversities_sagh = {top: [] for top in TOPs}
genre_diversities_sagh = {top: [] for top in TOPs}
np.random.seed(0)

top10_artists = sorted(artist2pop, key=lambda aid: artist2pop[aid])[-10:]
candidates = []
for aix in top10_artists:
    candidates += artist2songs[aix]
candidates = sorted(set(candidates))

assert len(candidates) > 0
y_pred = np.zeros(Y_test.shape[0])
for sid in candidates:
    ix = song2index[sid]
    y_pred[ix] = np.log(song2pop_train[sid])

y_pred_prob = softmax(y_pred)
spread_sagh = -np.dot(y_pred_prob, np.log(y_pred_prob))
sortix = np.argsort(-y_pred)
    
assert Y_test.shape[1] == len(test_playlists)
for j in range(Y_test.shape[1]):
    if (j+1) % 100 == 0:
        sys.stdout.write('\r%d / %d' % (j+1, Y_test.shape[1]))
        sys.stdout.flush()
    y_true = Y_test[:, j].A.reshape(-1)
    
    # rp, hr_dict = calc_RPrecision_HitRate(y_true, y_pred, tops=TOPs)
    rp, hr_dict, auc = calc_metrics(y_true, y_pred, tops=TOPs)
    rps_sagh.append(rp)
    for top in TOPs:
        hitrates_sagh[top].append(hr_dict[top])
    aucs_sagh.append(auc)
    
    # novelty
    u = test_playlists[j][1]
    for top in TOPs:
        nov = np.mean([-np.log2(song2pop[index2song[ix]]) for ix in sortix[:top]])
        try:
            novelties_sagh[top][u].append(nov)
        except KeyError:
            novelties_sagh[top][u] = [nov]
    
    # artist/genre diversity
    for top in TOPs:
        artist_vec = np.array([song2artist[index2song[ix]] if index2song[ix] in song2artist
                               else str(np.random.rand()) for ix in sortix[:top]])
        genre_vec = np.array([song2genre[index2song[ix]] if index2song[ix] in song2genre \
                              else str(np.random.rand()) for ix in sortix[:top]])
        artist_diversities_sagh[top].append( diversity(artist_vec) )
        genre_diversities_sagh[top].append( diversity(genre_vec) )
    
print('\n%d / %d' % (len(rps_sagh), Y_test.shape[1]))

8200 / 8260
8260 / 8260


In [18]:
# fig = plt.figure(figsize=[20, 5])
# ax1 = plt.subplot(131)
# ax1.hist(rps_sagh, bins=100)
# ax1.set_yscale('log')
# ax1.set_title('R-Precision')
# #ax.set_xlim(0, xmax)
# ax2 = plt.subplot(132)
# ax2.hist(aucs_sagh, bins=100)
# ax2.set_yscale('log')
# ax2.set_title('AUC')
# pass

In [19]:
sagh = {dataset_name: {'Test': {'R-Precision': np.mean(rps_sagh), 
                                'Hit-Rate': {top: np.mean(hitrates_sagh[top]) for top in TOPs},
                                'AUC': np.mean(aucs_sagh),
                                'Spread': spread_sagh,
                                'Novelty': {t: np.mean([np.mean(novelties_sagh[t][u]) 
                                                        for u in novelties_sagh[t]]) for t in TOPs},
                                'Artist-Diversity': {t: np.mean(artist_diversities_sagh[t]) for t in TOPs},
                                'Genre-Diversity': {t: np.mean(genre_diversities_sagh[t]) for t in TOPs}},
                       'Test_All': {'R-Precision': rps_sagh, 
                                    'Hit-Rate': {top: hitrates_sagh[top] for top in TOPs},
                                    'AUC': aucs_sagh,
                                    'Spread': spread_sagh,
                                    'Novelty': novelties_sagh,
                                    'Artist-Diversity': artist_diversities_sagh,
                                    'Genre-Diversity': genre_diversities_sagh}}}
sagh[dataset_name]['Test']

{'R-Precision': 0.011517569953153272,
 'Hit-Rate': {5: 0.0072283314410307485,
  10: 0.01187346865392709,
  20: 0.01828525628932811,
  30: 0.023922072564621453,
  50: 0.03413255911016453,
  100: 0.04971952425464591,
  200: 0.06397562584031569,
  300: 0.07119360509683723,
  500: 0.07886738885942846,
  700: 0.08107737771784608,
  1000: 0.0836792558494555},
 'AUC': 0.5374453818064964,
 'Spread': 10.678911088450416,
 'Novelty': {5: -9.229374328190145,
  10: -9.012952998828593,
  20: -8.749442392920043,
  30: -8.600460037448288,
  50: -8.367884055947888,
  100: -7.912705479654146,
  200: -7.246027401796856,
  300: -6.778342241595182,
  500: -6.0064122659859125,
  700: -5.227158299741144,
  1000: -4.136974641536504},
 'Artist-Diversity': {5: 0.9000000000000002,
  10: 0.8888888888888892,
  20: 0.8736842105263155,
  30: 0.8965517241379307,
  50: 0.8938775510204079,
  100: 0.8868686868686866,
  200: 0.8950753768844223,
  300: 0.8934002229654401,
  500: 0.8957835671342683,
  700: 0.89552013079910

In [20]:
fperf_sagh = os.path.join(data_dir, 'perf-sagh.pkl')
print(fperf_sagh)
pkl.dump(sagh, open(fperf_sagh, 'wb'))
pkl.load(open(fperf_sagh, 'rb'))[dataset_name]['Test']

data/aotm2011/coldstart/setting4/perf-sagh.pkl


{'R-Precision': 0.011517569953153272,
 'Hit-Rate': {5: 0.0072283314410307485,
  10: 0.01187346865392709,
  20: 0.01828525628932811,
  30: 0.023922072564621453,
  50: 0.03413255911016453,
  100: 0.04971952425464591,
  200: 0.06397562584031569,
  300: 0.07119360509683723,
  500: 0.07886738885942846,
  700: 0.08107737771784608,
  1000: 0.0836792558494555},
 'AUC': 0.5374453818064964,
 'Spread': 10.678911088450416,
 'Novelty': {5: -9.229374328190145,
  10: -9.012952998828593,
  20: -8.749442392920043,
  30: -8.600460037448288,
  50: -8.367884055947888,
  100: -7.912705479654146,
  200: -7.246027401796856,
  300: -6.778342241595182,
  500: -6.0064122659859125,
  700: -5.227158299741144,
  1000: -4.136974641536504},
 'Artist-Diversity': {5: 0.9000000000000002,
  10: 0.8888888888888892,
  20: 0.8736842105263155,
  30: 0.8965517241379307,
  50: 0.8938775510204079,
  100: 0.8868686868686866,
  200: 0.8950753768844223,
  300: 0.8934002229654401,
  500: 0.8957835671342683,
  700: 0.89552013079910

### Popularity based recommendation

In [18]:
rps_pop = []
hitrates_pop = {top: [] for top in TOPs}
aucs_pop = []
novelties_pop = {top: dict() for top in TOPs}
artist_diversities_pop = {top: [] for top in TOPs}
genre_diversities_pop = {top: [] for top in TOPs}
np.random.seed(0)

y_pred = np.array([song2pop_train[index2song[ix]] for ix in range(len(all_songs))])
y_pred_prob = softmax(np.log(y_pred))
spread_pop = -np.dot(y_pred_prob, np.log(y_pred_prob))
sortix = np.argsort(-y_pred)

assert Y_test.shape[1] == len(test_playlists)
for j in range(Y_test.shape[1]):
    if (j+1) % 100 == 0:
        sys.stdout.write('\r%d / %d' % (j+1, Y_test.shape[1]))
        sys.stdout.flush()
    y_true = Y_test[:, j].A.reshape(-1)
    
    # rp, hr_dict = calc_RPrecision_HitRate(y_true, y_pred, tops=TOPs)
    rp, hr_dict, auc = calc_metrics(y_true, y_pred, tops=TOPs)
    rps_pop.append(rp)
    for top in TOPs:
        hitrates_pop[top].append(hr_dict[top])
    aucs_pop.append(auc)
    
    # novelty
    u = test_playlists[j][1]
    for top in TOPs:
        nov = np.mean([-np.log2(song2pop[index2song[ix]]) for ix in sortix[:top]])
        try:
            novelties_pop[top][u].append(nov)
        except KeyError:
            novelties_pop[top][u] = [nov]

    # artist/genre diversity
    for top in TOPs:
        artist_vec = np.array([song2artist[index2song[ix]] if index2song[ix] in song2artist
                               else str(np.random.rand()) for ix in sortix[:top]])
        genre_vec = np.array([song2genre[index2song[ix]] if index2song[ix] in song2genre \
                              else str(np.random.rand()) for ix in sortix[:top]])
        artist_diversities_pop[top].append( diversity(artist_vec) )
        genre_diversities_pop[top].append( diversity(genre_vec) )
    
print('\n%d / %d' % (len(rps_pop), Y_test.shape[1]))

8200 / 8260
8260 / 8260


In [19]:
# fig = plt.figure(figsize=[20, 5])
# ax1 = plt.subplot(131)
# ax1.hist(rps_pop, bins=100)
# ax1.set_yscale('log')
# ax1.set_title('R-Precision')
# #ax.set_xlim(0, xmax)
# ax2 = plt.subplot(132)
# ax2.hist(aucs_pop, bins=100)
# ax2.set_yscale('log')
# ax2.set_title('AUC')
# pass

In [20]:
pop_perf = {dataset_name: {'Test': {'R-Precision': np.mean(rps_pop), 
                                    'Hit-Rate': {top: np.mean(hitrates_pop[top]) for top in TOPs},
                                    'AUC': np.mean(aucs_pop),
                                    'Spread': spread_pop,
                                    'Novelty': {t: np.mean([np.mean(novelties_pop[t][u]) for u in novelties_pop[t]]) 
                                                for t in TOPs},
                                    'Artist-Diversity': {top: np.mean(artist_diversities_pop[top]) for top in TOPs},
                                    'Genre-Diversity': {top: np.mean(genre_diversities_pop[top]) for top in TOPs}},
                           'Test_All': {'R-Precision': rps_pop, 
                                        'Hit-Rate': {top: hitrates_pop[top] for top in TOPs},
                                        'AUC': aucs_pop,
                                        'Spread': spread_pop,
                                        'Novelty': novelties_pop,
                                        'Artist-Diversity': artist_diversities_pop,
                                        'Genre-Diversity': genre_diversities_pop}}}
pop_perf[dataset_name]['Test']

{'R-Precision': 0.013474986581867915,
 'Hit-Rate': {5: 0.007437095890911096,
  10: 0.013500769773350406,
  20: 0.023683353747034324,
  30: 0.03242516552828786,
  50: 0.045383596859801874,
  100: 0.07433122144062095,
  200: 0.12010383330765882,
  300: 0.15331994733081822,
  500: 0.20927495300053606,
  700: 0.2523368469298472,
  1000: 0.30757834458153016},
 'AUC': 0.9175872878653097,
 'Spread': 10.50565527339403,
 'Novelty': {5: -9.396369039336701,
  10: -9.252264346548355,
  20: -9.090907011521443,
  30: -8.954077992930292,
  50: -8.761668669637778,
  100: -8.508119687075622,
  200: -8.213573203874882,
  300: -8.00689396789298,
  500: -7.738719831535387,
  700: -7.5509709347005876,
  1000: -7.3321758203662615},
 'Artist-Diversity': {5: 1.0,
  10: 1.0,
  20: 0.9736842105263155,
  30: 0.9747126436781612,
  50: 0.9763265306122446,
  100: 0.9854545454545458,
  200: 0.9889949748743715,
  300: 0.9905462653288738,
  500: 0.9934268537074149,
  700: 0.994138565297364,
  1000: 0.995413413413413},

In [21]:
fperf_pop = os.path.join(data_dir, 'perf-pop.pkl')
print(fperf_pop)
pkl.dump(pop_perf, open(fperf_pop, 'wb'))
pkl.load(open(fperf_pop, 'rb'))[dataset_name]['Test']

data/aotm2011/coldstart/setting4/perf-pop.pkl


{'R-Precision': 0.013474986581867915,
 'Hit-Rate': {5: 0.007437095890911096,
  10: 0.013500769773350406,
  20: 0.023683353747034324,
  30: 0.03242516552828786,
  50: 0.045383596859801874,
  100: 0.07433122144062095,
  200: 0.12010383330765882,
  300: 0.15331994733081822,
  500: 0.20927495300053606,
  700: 0.2523368469298472,
  1000: 0.30757834458153016},
 'AUC': 0.9175872878653097,
 'Spread': 10.50565527339403,
 'Novelty': {5: -9.396369039336701,
  10: -9.252264346548355,
  20: -9.090907011521443,
  30: -8.954077992930292,
  50: -8.761668669637778,
  100: -8.508119687075622,
  200: -8.213573203874882,
  300: -8.00689396789298,
  500: -7.738719831535387,
  700: -7.5509709347005876,
  1000: -7.3321758203662615},
 'Artist-Diversity': {5: 1.0,
  10: 1.0,
  20: 0.9736842105263155,
  30: 0.9747126436781612,
  50: 0.9763265306122446,
  100: 0.9854545454545458,
  200: 0.9889949748743715,
  300: 0.9905462653288738,
  500: 0.9934268537074149,
  700: 0.994138565297364,
  1000: 0.995413413413413},