In [None]:
import os
import spotipy
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.spatial.distance import cdist

# Distance matrices

To build a network which we can traverse with an ideal path, we first need to determine distances between each song according to the features we previously extracted from them. 

From there, we'll be able to determine an ideal path between them.

In [None]:
playlist_id = os.environ["PLAYLIST_ID"]

In [None]:
df = pd.read_csv(
    f"./data/{playlist_id}_features.csv",
    index_col=0
)

df

In [None]:
distance_matrix = cdist(
    df.values, 
    df.values, 
    'euclidean'
)

In [None]:
sns.heatmap(distance_matrix);

# Outliers

In [None]:
sp = spotipy.Spotify(
    client_credentials_manager=spotipy.oauth2.SpotifyClientCredentials()
)

sort tracks by their 'outlierness'. the tracks with the highest cumulative distance from the rest of the set should be the 'most different'. 

In [None]:
sorted_indexes = distance_matrix.sum(1).argsort()
sorted_indexes

In [None]:
sorted_track_ids = df.index.values[sorted_indexes]

### most alike

In [None]:
for track_id in sorted_track_ids[:5]:
    track = sp.track(track_id)
    print(f"{track['artists'][0]['name']}\n  {track['name']}\n")

### most different

In [None]:
for track_id in sorted_track_ids[:-6:-1]:
    track = sp.track(track_id)
    print(f"{track['artists'][0]['name']}\n  {track['name']}\n")

# save

In [None]:
with open(f"./data/{playlist_id}_distance.npy", "wb") as f:
    np.save(f, distance_matrix)