## Inference

In [1]:
# imports
import numpy as np
import random
import pickle

In [2]:
experiment_name = "test_03" #default_hyperparams"
random.seed(42)

### Load Tokenizer and Embeddings

In [3]:
experiment_dir = "../experiments/" + experiment_name

In [11]:
# load tokenizer
tokenizer = pickle.load(open(experiment_dir + "/data/tokenizer.pkl", "rb"))
random.sample(list(tokenizer.word_index.keys()), 5)

['suit & tie (feat. jay-z)',
 'frosty the snowman',
 'players - dj smallz 732 - jersey club remix',
 'come & get it',
 'running up freestyle']

In [7]:
# search for tracks / artists
for track_name in tokenizer.word_index.keys():
    if "bloc party" in track_name:
        print(track_name)

In [8]:
# load embeddings
embedding_weights = pickle.load(open(experiment_dir + "/embeddings.pkl", "rb"))
print("embeddings shape:", embedding_weights.shape)

embeddings shape: (4193, 100)


### Get top-n most similar tracks

In [9]:
# function to get top-n most similar tracks
def get_most_similar_tracks(track_name, n=10, tokenizer=tokenizer, embedding_weights=embedding_weights):
    
    # get track embedding
    track_idx = tokenizer.word_index[track_name]
    track_vector = embedding_weights[track_idx, :].reshape(1, -1)

    # compute similarities against other tracks
    similarities = np.dot(track_vector, embedding_weights.T) / (np.linalg.norm(track_vector) * np.linalg.norm(embedding_weights, axis=1))
    similarities = similarities.reshape(-1)

    # get most similar tracks' indices
    most_similar_idxs = np.argpartition(similarities, -(n+1))[-(n+1):]
    most_similar_idxs = most_similar_idxs[np.argsort(similarities[most_similar_idxs])][::-1][1:]

    # print most similar tracks, along with their positions in training data
    print("top {} tracks most similar to '{}' (pos. {}):".format(n, track_name, track_idx))
    for idx in most_similar_idxs:
        print("- (sim. {:.3f}): '{}' (pos. {})".format(similarities[idx], tokenizer.index_word[idx], idx))

### Try it out

In [12]:
# specify track name and top n
track_name = "bloc party - helicopter"
n = 10

In [21]:
track_name = "frosty the snowman"
n = 10
get_most_similar_tracks(track_name, n=n)

top 10 tracks most similar to 'frosty the snowman' (pos. 54):
- (sim. 0.757): 'sleigh ride' (pos. 558)
- (sim. 0.614): 'i saw mommy kissing santa claus' (pos. 3786)
- (sim. 0.580): 'atmosphere' (pos. 934)
- (sim. 0.572): 'got your back' (pos. 273)
- (sim. 0.563): 'hey ya!' (pos. 3051)
- (sim. 0.553): 'if you had my love' (pos. 2106)
- (sim. 0.537): 'asap' (pos. 2973)
- (sim. 0.530): 'country girl (shake it for me)' (pos. 418)
- (sim. 0.521): '4 leaf clover (feat. steve lacy)' (pos. 3238)
- (sim. 0.520): 'history' (pos. 635)
