In [1]:
import json
import pandas as pd

def load_data(verbose=False):
    data = json.load(open("data/ismpd.json"))['playlists']
    if verbose:
        print("Playlists: {}".format(len(data)))
        print(data[0])
    return data

def load_challenge(verbose=False):
    data = json.load(open("data/challenge_set.json"))['playlists']
    if verbose:
        print("Playlists: {}".format(len(data)))
        print(data[0])
    return data

def get_track_uri_to_name_map(playlists=None):
    if not playlists:
        playlists = load_data()
    res = {}
    for playlist in playlists:
        res.update({track['track_uri']:track['track_name'] for track in playlist['tracks']})
    return res

def get_playslist_track_uris(playlist):
    return [track['track_uri'] for track in playlist['tracks']]

def get_all_tracks_uris(playlists):
    results = set()
    for playlist in playlists:
        for track_uri in get_playslist_track_uris(playlist):
            results.add(track_uri)
    return list(results)

def get_playlists_for_track(track_uri, playslists):
    return list(set([playlist['pid'] for playlist in playlists if track_uri in get_playslist_track_uris(playlist)]))
    
def item_item_sim_matrix(playlists=None):
    if not playlists:
        playlists = load_data()
        
    tracks = get_all_tracks_uris(playlists)
    
    print("Loading playlists by track...", end="\t")
    playlists_by_track = {t: get_playlists_for_track(t, playlists) for t in tracks}
    print("Done")
    
    print("Constructing similarity matrix...")
    similars = {}
    for iteration, i in enumerate(tracks):
        similars_to_i = []
        pl1 = playlists_by_track[i]
        for j in tracks:
            pl2 = playlists_by_track[j]
            s = len(set(pl1) & set(pl2)) / (len(pl1) * len(pl2))
            if s > 0 and i != j:
                similars_to_i.append((j, s))
        similars[i] = sorted(similars_to_i, key=lambda x: x[1], reverse=True)
        if iteration % 100 == 0:
            print("\t{} ==> {}".format(iteration, len(similars_to_i)))
    print("Done")
    
    return similars

def get_artists(playlists=None):
    artists = set()
    for p in playlists:
        for t in p['tracks']:
            artists.add(t['artist_name'])
    return list(artists)

def get_artist(track_uri, playlists):
    for p in playlists:
        for t in p['tracks']:
            if t['track_uri'] == track_uri:
                return t['artist_name']
    return None

def get_tracks_from_artist(artist_name, playlists):
    tracks = set()
    for p in playlists:
        for t in p['tracks']:
            if t['artist_name'] == artist_name:
                tracks.add(t['track_uri'])
    return list(tracks)

def explore(m, i, n=10):
    keys = list(m.keys())
    ids_to_names = get_track_uri_to_name_map()    
    track_uri = keys[i]
    name = ids_to_names[track_uri]
    names = [(uri, ids_to_names[uri]) for uri, s in m[track_uri][:n]]
    print("Top {} sims for '{}' ({}):".format(n, name, get_artist(track_uri, load_data())))
    for uri, n in names:
        print("\t{} ({})".format(n, get_artist(uri, load_data())))
    


playlists = load_data()
playlists[0]['tracks'][0]

{'album_name': 'Pulses',
 'album_uri': 'spotify:album:2UvU0egYoXsysWkHqQy4Lc',
 'artist_name': 'Karmin',
 'artist_uri': 'spotify:artist:4M0DLz8te9Q1lNIXBBwvfG',
 'duration_ms': 198906,
 'pos': 0,
 'track_name': 'Acapella',
 'track_uri': 'spotify:track:6RjW45KHJ6kgI2xQ1aFa52'}

In [None]:
m = item_item_sim_matrix()

In [None]:
artists = get_artists(load_data())

In [None]:
michael_jackson_tracks = get_tracks_from_artist("Michael Jackson", load_data())
billie_jean_uri = 'spotify:track:5ChkMS8OtdzJeqyybCc9R5'

In [None]:
explore(m, keys.index(billie_jean_uri))

In [None]:
def get_similars_to_list(tracks_uris, item_item_matrix):
    items = {}
    for track_uri in tracks_uris:
        for item, score in item_item_matrix[track_uri]:
            if item in items:
                items[item] += score
            else:
                items[item] = score
    result = sorted(items.items(), key=lambda x: x[1], reverse=True)[:500]
    return result

In [None]:
res = {}
for playlist in load_challenge()[1000:1020]:
    if playlist['num_samples'] > 0:
        try:
            uris = [track['track_uri'] for track in playlist['tracks']]
            res[playlist['pid']] = get_similars_to_list(uris, m)
        except:
            print("exception")
            res[playlist['pid']] = []    
    else:
        res[playlist['pid']] = []

In [None]:
res