## Inference

In [1]:
# imports
import numpy as np
import pandas as pd
import random
import pickle

In [2]:
experiment_name = "test_11" #default_hyperparams"

### Load Tokenizer and Embeddings

In [3]:
experiment_dir = "../experiments/" + experiment_name

In [8]:
# load tokenizer
experiment_name = "test_10"
tokenizer = pickle.load(open(experiment_dir + "/data/tokenizer.pkl", "rb"))
embedding_weights = pickle.load(open(experiment_dir + "/embeddings.pkl", "rb"))
df = pd.read_csv('../raw_data/spotify_playlists.tsv', sep='\t', index_col=0)
random.sample(list(tokenizer.word_index.keys()), 5)

['spotify:track:1mb187x5w3ouqnh6p5m28y',
 'spotify:track:78qd8dvwea0gosb6fe6j3k',
 'spotify:track:2b1mcbfwrz1teox1vsm4xt',
 'spotify:track:4medno5ya2zi6imlvaprci',
 'spotify:track:6puizlqotempubfjbwywob']

In [9]:
# search for tracks / artists
for track_name in tokenizer.word_index.keys():
    if "bloc party" in track_name:
        print(track_name)

### Get top-n most similar tracks

In [11]:
# function to get top-n most similar tracks
def get_most_similar_tracks(track_name, n=10, tokenizer=tokenizer, embedding_weights=embedding_weights):
    
    # get track embedding
    track_idx = tokenizer.word_index[track_name]
    track_vector = embedding_weights[track_idx, :].reshape(1, -1)

    # compute similarities against other tracks
    similarities = np.dot(track_vector, embedding_weights.T) / (np.linalg.norm(track_vector) * np.linalg.norm(embedding_weights, axis=1))
    similarities = similarities.reshape(-1)

    # get most similar tracks' indices
    most_similar_idxs = np.argpartition(similarities, -(n+1))[-(n+1):]
    most_similar_idxs = most_similar_idxs[np.argsort(similarities[most_similar_idxs])][::-1][1:]

    # print most similar tracks, along with their positions in training data
    print("top {} tracks most similar to '{}' (pos. {}):".format(n, track_name, track_idx))
    for idx in most_similar_idxs:
        print("- (sim. {:.3f}): '{}' (pos. {})".format(similarities[idx], tokenizer.index_word[idx], idx))

In [12]:
track_name = "spotify:track:1mb187x5w3ouqnh6p5m28y"
n = 10
get_most_similar_tracks(track_name, n=n)

top 10 tracks most similar to 'spotify:track:1mb187x5w3ouqnh6p5m28y' (pos. 1825):
- (sim. 0.965): 'spotify:track:6iagarq2wdeyzpsy2kx0hw' (pos. 4868)
- (sim. 0.962): 'spotify:track:6nlr5kzygfaxcwuh96konw' (pos. 1850)
- (sim. 0.961): 'spotify:track:7datplicsnwrymddh1qmpc' (pos. 7981)
- (sim. 0.961): 'spotify:track:6rtug7pmjtxard0fttrlt0' (pos. 6159)
- (sim. 0.961): 'spotify:track:12yagubl6uk9e7fzopf4ji' (pos. 7842)
- (sim. 0.961): 'spotify:track:2t0rxns7lfr0e05frikoph' (pos. 5222)
- (sim. 0.960): 'spotify:track:11zulcyy4lowvcqm4oe3vj' (pos. 6096)
- (sim. 0.960): 'spotify:track:6pzzzuokovzip8feqkfd1o' (pos. 7012)
- (sim. 0.960): 'spotify:track:3h8dfh94vuzeglesojplhq' (pos. 6010)
- (sim. 0.959): 'spotify:track:58fuwmyguqkt53ypucm7cj' (pos. 3905)
