In [1]:
from code.data_utils import get_token, get_auth_header, get_artist_albums, get_album_tracks, get_artist_tracks, get_audio_features
from code.analysis import normalize_audio_feats
import pandas as pd
import numpy as np

token=get_token()
print(token)

5835805e22a345948373bd954162324c 41b27d9b0cbe44deb1d9edd7d306160d
BQBqrSN1KXVUrCKMHjbosRXVAlQvQqtjoSGM5R9TXcgZ7nVf00vgS9rsT6JKHiQSGFC0gf7ouNLanZF9-BGPsJiJ6quAku9tBBitwHRmTgY-8wUhN3s


In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances

# Define the circle of fifths order
CIRCLE_OF_FIFTHS = [0, 7, 2, 9, 4, 11, 6, 1, 8, 3, 10, 5]

# Create a mapping from pitch classes to their positions in the circle of fifths
PITCH_CLASS_TO_POSITION = {pc: idx for idx, pc in enumerate(CIRCLE_OF_FIFTHS)}

# Number of pitch classes
N_PITCH_CLASSES = len(CIRCLE_OF_FIFTHS)

# Calculate the angle for each pitch class
ANGLES = {pc: (2 * np.pi * idx) / N_PITCH_CLASSES for pc, idx in PITCH_CLASS_TO_POSITION.items()}

def angle_to_vector(angle):
    return np.array([np.cos(angle), np.sin(angle)])

# Create vectors for each pitch class
PITCH_CLASS_VECTORS = {pc: angle_to_vector(angle) for pc, angle in ANGLES.items()}

# def cosine_similarity(vec1, vec2):
#     return np.dot(vec1, vec2) / (norm(vec1) * norm(vec2))

def normalize_cosine_similarity(cosine_similarity):
    """
    Normalize cosine similarity from [-1, 1] to [0, 1].
    """
    return (cosine_similarity + 1) / 2
    

def normalize_audio_feats(feats_df, tempo_factor = 1, include_categorical=True, circle_5 = True):
    # Normalize continuous features
    scaler = MinMaxScaler()
    continuous_features = ['tempo', 'valence', 'liveness', 'instrumentalness', 'acousticness', 'speechiness', 'loudness', 'energy', 'danceability']
    # feats_df[continuous_features] = scaler.fit_transform(feats_df[continuous_features])
    scaler.fit(feats_df[continuous_features])
    feats_df.tempo = tempo_factor*feats_df.tempo
    feats_df[continuous_features] = scaler.transform(feats_df[continuous_features])

    # Encode categorical features
    if include_categorical:
        if circle_5: 
            categorical_encoded = np.array([PITCH_CLASS_VECTORS[k] for k in feats_df['key'].tolist()])
            categorical_encoded_df = pd.DataFrame(categorical_encoded)
            categorical_encoded_df.columns = ['C5_0', 'C5_1']
        else:
            encoder = OneHotEncoder(sparse_output=False)
            categorical_features = feats_df[['key']]
            categorical_encoded = encoder.fit_transform(categorical_features)
            categorical_encoded_df = pd.DataFrame(categorical_encoded)
        
        # # Combine all features
        feats_df = pd.concat([feats_df[continuous_features], categorical_encoded_df], axis=1)
    # print(feats_df)    
    # scaler = MinMaxScaler()
    # feats_df = pd.DataFrame(scaler.fit_transform(feats_df))
    # feats_df.columns = continuous_features+['C5_0', 'C5_1']
    # print(feats_df)
    return feats_df

In [3]:
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances

def compare_discog(audio_feats_df_1, audio_feats_df_2, include_categorical=True, circle_5 = True):
    # n_seeds = len(audio_feats_df_1)
    # audio_feats_df = pd.concat([audio_feats_df_1, audio_feats_df_2], axis=0, ignore_index=True)
    norm_audio_feats_df_1 = normalize_audio_feats(audio_feats_df_1.copy(), 1, include_categorical, circle_5)
    norm_audio_feats_df_2 = normalize_audio_feats(audio_feats_df_2.copy(), 1, include_categorical, circle_5)
    d_1 = cosine_similarity(norm_audio_feats_df_1, norm_audio_feats_df_2)
    # print(audio_feats_df_1.tempo)
    
    norm_audio_feats_df_1 = normalize_audio_feats(audio_feats_df_1.copy(), 0.5, include_categorical, circle_5)
    d_2 = cosine_similarity(norm_audio_feats_df_1, norm_audio_feats_df_2)

    norm_audio_feats_df_1 = normalize_audio_feats(audio_feats_df_1.copy(), 2, include_categorical, circle_5)
    d_3 = cosine_similarity(norm_audio_feats_df_1, norm_audio_feats_df_2)
    
    # d_max = np.max(d, axis=1)
    return d_1, d_2, d_3

In [4]:
def get_album_tracks(token, album_id, summarize=False):
    url = os.path.join(ROOT_API_URL, f"albums/{album_id}/tracks")
    headers = get_auth_header(token)
    result = get(url, headers=headers)
    js_result = json.loads(result.content)
    print(js_result)
    tracks_info = js_result['items']
    # track_name = [t['name'] for t in tracks_info]
    # track_id = [t['id'] for t in tracks_info]
    track_artists = [t['artists'] for t in tracks_info]
    
    if summarize:
        select_track_keys = ['name', 'id']
        select_artist_keys = ['name', 'id']
        track_ls = [{'track_%s'%k: t[k] for k in select_track_keys} for t in tracks_info]
        artist_ls = [{'artist_%s'%k: a[0][k] for k in select_artist_keys} for a in track_artists]
            
        tracks_info = pd.DataFrame([{**a, **t} for a, t in zip(artist_ls, track_ls)])
    return tracks_info

In [5]:
#Radiohead: https://open.spotify.com/artist/4Z8W4fKeB5YxbusRsdQVPb?si=blakDesXQYWlHpnLqdDwkQ
#Arcade Fire: https://open.spotify.com/artist/3kjuyTCjPG1WMFCiyc5IuB?si=2cnIfg3QS2WuK0QYrsqupA
#Future Islands: https://open.spotify.com/artist/1WvvwcQx0tj6NdDhZZ2zZz?si=ig_wF6OVTZSj6XOKsrdx_A
#DM - https://open.spotify.com/artist/762310PdDnwsDxAQxzQkfX?si=rJ18xcQdS_2drHgcSukRDg
#Kraftwerk: https://open.spotify.com/artist/0dmPX6ovclgOy8WWJaFEUU?si=rNgVC61lRdKrfVz88pvSZw

#Modest Mouse: https://open.spotify.com/artist/1yAwtBaoHLEDWAnWR87hBT?si=dlOVIWNVR8G8F-6qdePosw
#Police - https://open.spotify.com/artist/5NGO30tJxFlKixkPSgXcFE?si=rUyliCBOS8W8Cbh94AV1OA
#Beatles - https://open.spotify.com/artist/3WrFJ7ztbogyGnTHbHJFl2?si=PJiFfighQlOsD0pJXrZSLw

artist_id_1 = '3WrFJ7ztbogyGnTHbHJFl2'
tracks_df_1 = get_artist_tracks(token, artist_id_1, live_tracks=False)
tracks_df_1 = tracks_df_1[tracks_df_1.artist_id == artist_id_1].reset_index(drop=True)
tracks_df_1.head()

0
Next request in ~ 4 seconds
Elapsed Time: 24.215622186660767 seconds
Getting next 0 tracks


Unnamed: 0,artist_name,artist_id,track_name,track_id
0,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,Strawberry Fields Forever - 2015 Mix,4JaTNsbucUxF3FtKqt3IY3
1,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,Penny Lane - 2017 Mix,5M8s9X2vuW1tv52uv4meCt
2,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,Sgt. Pepper's Lonely Hearts Club Band - 2017 Mix,5GuGzaaKHoOR3amXgvT2Pq
3,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,With A Little Help From My Friends - 2017 Mix,0Q6YQlQHdFd6NfXjcHtijM
4,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,Lucy In The Sky With Diamonds - 2017 Mix,4GX8I8c7gMZn7mZFM9QAs0


In [357]:
# album_info = get_artist_albums(token, artist_id_1, offset=0, limit=50)
# album_name_ls = [a['name'] for a in album_info]
# album_id_ls = [a['id'] for a in album_info]
# tracks = []
# tracks += [get_album_tracks(token, album_id, summarize=True) for album_id in album_id_ls]


In [6]:
audio_feats_1 = get_audio_features(token, tracks_df_1.track_id.tolist())
audio_feats_df_1 = pd.DataFrame(audio_feats_1)
audio_feats_df_1 = audio_feats_df_1.loc[:,['tempo', 'valence', 'liveness', 'instrumentalness',
       'acousticness', 'speechiness', 'mode', 'loudness', 'key', 'energy',
       'danceability']]
audio_feats_df_1

50
request 1 - 50 tracks completed
Next request in ~ 2 seconds
Elapsed Time: 2.1941137313842773 seconds
100
request 2 - 100 tracks completed
Next request in ~ 4 seconds
Elapsed Time: 6.705664396286011 seconds
150
request 3 - 150 tracks completed
Next request in ~ 3 seconds
Elapsed Time: 9.795597076416016 seconds
200
request 4 - 200 tracks completed
Next request in ~ 4 seconds
Elapsed Time: 14.12145709991455 seconds
250
request 5 - 250 tracks completed
Next request in ~ 2 seconds
Elapsed Time: 16.255526542663574 seconds
300
request 6 - 300 tracks completed
Next request in ~ 5 seconds
Elapsed Time: 21.412887811660767 seconds
350
request 7 - 350 tracks completed
Next request in ~ 3 seconds
Elapsed Time: 25.39545440673828 seconds
400
request 8 - 400 tracks completed
Next request in ~ 1 seconds
Elapsed Time: 27.255733489990234 seconds
450
request 9 - 450 tracks completed
Next request in ~ 3 seconds
Elapsed Time: 30.987874507904053 seconds
500
request 10 - 500 tracks completed
Next request i

Unnamed: 0,tempo,valence,liveness,instrumentalness,acousticness,speechiness,mode,loudness,key,energy,danceability
0,96.705,0.220,0.0918,0.000636,0.26900,0.2920,1,-8.727,10,0.665,0.389
1,113.063,0.677,0.1230,0.003140,0.08980,0.0402,1,-8.369,9,0.506,0.631
2,95.369,0.472,0.8940,0.035400,0.03780,0.1130,1,-7.866,7,0.837,0.360
3,112.303,0.707,0.2560,0.000000,0.13000,0.0282,1,-7.900,4,0.595,0.678
4,187.405,0.497,0.1270,0.000000,0.13000,0.0442,1,-6.921,2,0.537,0.247
...,...,...,...,...,...,...,...,...,...,...,...
516,106.301,0.741,0.1480,0.000000,0.11300,0.0268,1,-10.061,2,0.608,0.382
517,137.735,0.697,0.1900,0.000000,0.11700,0.0298,0,-10.553,6,0.723,0.642
518,122.466,0.852,0.3230,0.000000,0.01570,0.0408,1,-8.636,2,0.569,0.753
519,176.548,0.654,0.1050,0.004490,0.17300,0.0351,1,-11.033,4,0.386,0.430


In [7]:
#Kraftwerk: https://open.spotify.com/artist/0dmPX6ovclgOy8WWJaFEUU?si=rNgVC61lRdKrfVz88pvSZw
#Orchestral Manoeuvres In The Dark - https://open.spotify.com/artist/7wJ9NwdRWtN92NunmXuwBk?si=DkuYZqpkRM2xB-X9Xl1L8g
#The Cure - https://open.spotify.com/artist/7bu3H8JO7d0UbMoVzbo70s?si=0wtCJ0DCQHKIe-Fb6zoNJg
#DM - https://open.spotify.com/artist/762310PdDnwsDxAQxzQkfX?si=rJ18xcQdS_2drHgcSukRDg
#Boy Harsher - https://open.spotify.com/artist/4iom7VVRU6AHRIu1JUXpLG?si=8mTRPDd2TryJi9XQ3MmDeA

#Pixies - https://open.spotify.com/artist/6zvul52xwTWzilBZl6BUbT?si=UQflLGjYTq2MeyKqB2IGrg
#Killers - https://open.spotify.com/artist/0C0XlULifJtAgn6ZNCW2eu?si=5k2jA9OQQLycx75Af14MeA
#2Door - #2door - https://open.spotify.com/artist/536BYVgOnRky0xjsPT96zl?si=It8JgQrzSoaj18F9nrvwWg

#Stick Figure - https://open.spotify.com/artist/5SXEylV07TC57eanSxxg4R?si=3YC-Jxk2TkK02b4kronYlg
#Slightly Stoopid - https://open.spotify.com/artist/6MxlVTY6PmY8Nyn16fvxtb?si=ozGq6h1_SJeaYJ7Rx2VeSQ
#Chemical Brothers - https://open.spotify.com/artist/1GhPHrq36VKCY3ucVaZCfo?si=ePxX0PI-QSyA_qvzf8JLUw

artist_id_2 = '1GhPHrq36VKCY3ucVaZCfo'
tracks_df_2 = get_artist_tracks(token, artist_id_2, live_tracks=False)
tracks_df_2 = tracks_df_2[tracks_df_2.artist_id == artist_id_2].reset_index(drop=True)
tracks_df_2.head()

0
Next request in ~ 2 seconds
Elapsed Time: 31.249794483184814 seconds
Getting next 20 tracks
50
Next request in ~ 4 seconds
Elapsed Time: 46.40679621696472 seconds
Getting next 35 tracks
100
Next request in ~ 2 seconds
Elapsed Time: 68.50265955924988 seconds
Getting next 0 tracks


Unnamed: 0,artist_name,artist_id,track_name,track_id
0,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Intro,6WGGymJloCXBL4BY4liTda
1,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Live Again (feat. Halo Maud),55djbD8EGJOd3akmGMXqw6
2,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,No Reason,05viyiChBLJMuVup0mtH2A
3,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Goodbye,5Eyhw61L8qs4bdc6G4cfW3
4,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Fountains,6VBFpHKhpBDfpjAcSOISoH


In [8]:
audio_feats_2 = get_audio_features(token, tracks_df_2.track_id.tolist())
audio_feats_df_2 = pd.DataFrame(audio_feats_2)
audio_feats_df_2 = audio_feats_df_2.loc[:,['tempo', 'valence', 'liveness', 'instrumentalness',
       'acousticness', 'speechiness', 'mode', 'loudness', 'key', 'energy',
       'danceability']]
audio_feats_df_2

50
request 1 - 50 tracks completed
Next request in ~ 4 seconds
Elapsed Time: 4.717858791351318 seconds
100
request 2 - 100 tracks completed
Next request in ~ 1 seconds
Elapsed Time: 6.4284093379974365 seconds
150
request 3 - 150 tracks completed
Next request in ~ 2 seconds
Elapsed Time: 8.545787572860718 seconds
200
request 4 - 200 tracks completed
Next request in ~ 3 seconds
Elapsed Time: 11.71789264678955 seconds
250
request 5 - 250 tracks completed
Next request in ~ 5 seconds
Elapsed Time: 17.25292468070984 seconds
300
request 6 - 300 tracks completed
Next request in ~ 2 seconds
Elapsed Time: 19.745505809783936 seconds
350
request 7 - 350 tracks completed
Next request in ~ 4 seconds
Elapsed Time: 24.6468448638916 seconds
400
request 8 - 400 tracks completed
Next request in ~ 2 seconds
Elapsed Time: 27.67387056350708 seconds
410
request 9 - 410 tracks completed
Elapsed Time: 28.25812077522278 seconds


Unnamed: 0,tempo,valence,liveness,instrumentalness,acousticness,speechiness,mode,loudness,key,energy,danceability
0,126.973,0.354,0.0855,0.809,0.004840,0.0426,0,-12.578,5,0.580,0.686
1,127.003,0.527,0.0779,0.341,0.000875,0.0752,0,-5.440,6,0.897,0.550
2,128.039,0.495,0.1460,0.802,0.003040,0.0422,1,-7.179,2,0.866,0.663
3,129.941,0.152,0.0829,0.333,0.020200,0.0556,1,-5.714,1,0.894,0.539
4,107.020,0.509,0.2420,0.087,0.124000,0.0408,1,-7.215,0,0.710,0.769
...,...,...,...,...,...,...,...,...,...,...,...
405,120.021,0.230,0.7380,0.295,0.007060,0.0827,1,-5.897,7,0.918,0.675
406,127.001,0.363,0.1140,0.522,0.119000,0.0529,1,-4.976,2,0.921,0.632
407,127.001,0.363,0.1140,0.522,0.119000,0.0529,1,-4.976,2,0.921,0.632
408,124.964,0.735,0.4260,0.788,0.118000,0.0612,1,-10.995,7,0.939,0.614


In [9]:
na_mask_1 = (audio_feats_df_1.isna().sum(1)==0).tolist()
print(len(audio_feats_df_1)-sum(na_mask_1))
if sum(na_mask_1) < len(audio_feats_df_1):
    tracks_df_1 = tracks_df_1[na_mask_1].reset_index(drop=True)
    audio_feats_df_1 = audio_feats_df_1[na_mask_1].reset_index(drop=True)

na_mask_2 = (audio_feats_df_2.isna().sum(1)==0).tolist()
print(len(audio_feats_df_2)-sum(na_mask_2))
if sum(na_mask_2) < len(audio_feats_df_2):
    tracks_df_2 = tracks_df_2[na_mask_2].reset_index(drop=True)
    audio_feats_df_2 = audio_feats_df_2[na_mask_1].reset_index(drop=True)

0
0


In [84]:
S_1, S_2, S_3 = compare_discog(audio_feats_df_1.copy(), audio_feats_df_2.copy(), include_categorical=True, circle_5 = True)
S = np.maximum.reduce(np.array([S_1, S_2, S_3]))
# S = S_1

In [85]:
S_sub_sorted = np.sort(S, axis=1)[:,::-1][:,:10]
s_max = S_sub_sorted[:,0]
S_sub_sorted

array([[0.91978526, 0.89874405, 0.89874405, ..., 0.88464917, 0.88457708,
        0.87986062],
       [0.9843199 , 0.9384464 , 0.9384464 , ..., 0.92670108, 0.92412321,
        0.92285859],
       [0.94972252, 0.94611138, 0.94192211, ..., 0.91477284, 0.91477013,
        0.89586502],
       ...,
       [0.95390781, 0.95035357, 0.94954626, ..., 0.93135687, 0.91899105,
        0.91812912],
       [0.91849443, 0.89896704, 0.89832762, ..., 0.88624942, 0.88461875,
        0.88149913],
       [0.94791639, 0.93895741, 0.93458209, ..., 0.93320478, 0.93320478,
        0.93320478]])

In [86]:
s_max_ix1 = np.argsort(s_max)[::-1]
s_max_ix1

array([101, 208, 233,  94, 105, 498, 410, 405, 331, 508, 340, 399, 444,
       294,  60, 313, 136, 517, 219, 189, 258, 145,   1,  61, 238, 376,
       460, 366, 174,  54, 293, 479,  67, 112, 306,  33,  19, 244, 375,
       264, 334,  71, 188,  86,  69,  50, 139, 150,  78, 413, 179,  96,
       476,  52, 372,  58, 445,  36, 457, 390, 401, 191, 403, 172, 271,
       256,  35,  51, 183, 434, 220, 286, 246,  93, 393,  12, 311, 474,
        84, 304, 198, 300, 449, 407, 298,  77, 117, 291, 324, 511, 400,
        55, 329, 104, 510, 384, 111, 338, 339, 180, 514, 327, 303, 302,
        16, 144, 147, 439,  26, 193,  70,  29, 394,  37,  32, 365, 102,
       441,  88,  64, 151, 459, 225, 337, 171, 391, 443, 380,  11, 251,
        31, 512, 482, 146,  20, 451,  40, 252, 471, 288, 442, 398, 392,
       250, 369, 492, 192,  82, 275,  13,  48,  63,  14, 356, 491, 507,
       389, 309, 182, 148, 266, 257, 461, 130, 211, 332,  62, 157, 402,
       470, 428,  57, 424, 464,  65, 173, 379, 249, 289, 255, 30

In [87]:
s_max_ix2 = np.argsort(S, axis=1)[:,::-1][:,:10]
s_max_ix_2 = s_max_ix2[s_max_ix1[:40],:5]
s_max_ix_2
# np.argsort(S[s_max_ix1[:5],:], axis=1)[:, ::-1][:,:10]

array([[395, 330,  36, 385, 205],
       [109, 181, 139, 119, 229],
       [350, 352, 351, 353, 223],
       [187, 317, 319, 148, 318],
       [227, 178, 341, 256, 294],
       [102, 326, 144, 124,   9],
       [156, 287, 283, 290, 176],
       [ 50, 304,  47,   8, 376],
       [305, 177, 153,   4, 173],
       [305, 177, 153,   4, 173],
       [144, 124, 326, 256, 102],
       [305, 177, 153, 173,   4],
       [156, 290, 287, 283, 176],
       [305, 153, 177, 173,   4],
       [102, 326, 144, 124,   9],
       [306, 136, 116, 367, 329],
       [305, 177, 153, 173,   4],
       [304,  50,  47,   8, 376],
       [156, 287, 290, 283, 176],
       [210, 127,  14, 357, 309],
       [305, 153, 173, 177,   4],
       [116, 136,   5, 313, 112],
       [156, 287, 283, 290, 176],
       [289, 286, 281, 186, 176],
       [305, 153, 173, 177,   4],
       [254, 348, 244, 251,   7],
       [144, 124, 326, 256,  70],
       [326, 144, 124, 256,  70],
       [156, 283, 290, 287, 286],
       [223, 3

In [88]:
np.sort(s_max)[::-1][:25]


array([0.99299726, 0.99155249, 0.99142063, 0.99135886, 0.99118032,
       0.99074869, 0.9904682 , 0.99042703, 0.98942798, 0.98912912,
       0.98870864, 0.98765552, 0.98705571, 0.98675731, 0.98670599,
       0.9858651 , 0.98551555, 0.98551458, 0.98544207, 0.9851123 ,
       0.98471525, 0.98470804, 0.9843199 , 0.98424867, 0.98410548])

In [89]:
tracks_df_1.iloc[s_max_ix1[:20],:]

Unnamed: 0,artist_name,artist_id,track_name,track_id
101,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,Polythene Pam - 2019 Mix,7BavH3tDEsEDKnntiVTaAu
208,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,Birthday - Remastered 2009,1ABegtCPBMMJaMpfDyATjE
233,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,Sgt. Pepper's Lonely Hearts Club Band - Repris...,42ocGQCOT0xYtV3f5kJDsD
94,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,Octopus's Garden - 2019 Mix,3e1w0Wm0sH8nUYPArDkBG3
105,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,The End - 2019 Mix,481payDhvwK4SluEdw2o5v
498,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,Get Back - Remastered 2009,3d5v5scfknhlp6oOC7BZBs
410,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,Penny Lane - Remastered 2015,7afyyi8zn4FzWbcJ70F56V
405,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,Day Tripper - Remastered 2015,29b2b96jozyD9GPCkOrVLs
331,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,A Hard Day's Night - Remastered 2009,5J2CHimS7dWYMImCHkEFaJ
508,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,A Hard Day's Night - Remastered 2009,1E4FcSqQs7U2RTebCNaYRH


In [90]:
tracks_df_2.iloc[s_max_ix_2[2], :]

Unnamed: 0,artist_name,artist_id,track_name,track_id
350,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Out Of Control - Sasha Remix,6lxMmFyXLxgLbxNpJ5Lmt6
352,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Out Of Control - Sasha Club Mix,3j6BOTtzPBSkmr5dEknhk7
351,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Out Of Control - Sasha Remix / Radio Edit,0h19gBDFOK8lKmHSLAXgTK
353,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Out Of Control - Sasha Instrumental,7cZOkHru5oFXZUMByjwiS5
223,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Chico's Groove,7JzD3lTV3bWCceb9A1DroB


In [91]:
#https://open.spotify.com/track/5vtRk4rYxiy4cj95cmJ5Ma?si=52adadf2bcd74639
#https://open.spotify.com/track/2wnsBaxrmkthIFAm6vqCuX?si=72c49a5c59c44ea3
#https://open.spotify.com/track/00oZhqZIQfL9P5CjOP6JsO?si=2ca72b174a2b40cb

track_inspect = 'Setting Sun'
print(tracks_df_2[tracks_df_2.track_name==track_inspect])
track_ix = tracks_df_2[tracks_df_2.track_name==track_inspect].index
audio_feats_df_2.iloc[track_ix, :]

               artist_name               artist_id   track_name  \
15   The Chemical Brothers  1GhPHrq36VKCY3ucVaZCfo  Setting Sun   
145  The Chemical Brothers  1GhPHrq36VKCY3ucVaZCfo  Setting Sun   
211  The Chemical Brothers  1GhPHrq36VKCY3ucVaZCfo  Setting Sun   
379  The Chemical Brothers  1GhPHrq36VKCY3ucVaZCfo  Setting Sun   

                   track_id  
15   6yw35eTaS04QFe5PSirX9Z  
145  4Yb8mY1J1nDwrhRlhCa9DS  
211  5JBGbFdBtLcCCygw8rtjli  
379  0Pi9gXX8JPKGD6kbUPYEPw  


Unnamed: 0,tempo,valence,liveness,instrumentalness,acousticness,speechiness,mode,loudness,key,energy,danceability
15,135.034,0.231,0.353,0.133,0.000665,0.212,1,-1.175,0,0.995,0.447
145,135.041,0.191,0.367,0.0543,0.000401,0.142,1,-1.594,0,0.988,0.412
211,135.041,0.248,0.354,0.13,0.000738,0.223,1,-1.169,0,0.995,0.446
379,135.041,0.248,0.354,0.13,0.000738,0.223,1,-1.169,0,0.995,0.446


In [92]:
tracks_df_1[tracks_df_1.track_id == '00oZhqZIQfL9P5CjOP6JsO']

Unnamed: 0,artist_name,artist_id,track_name,track_id
288,The Beatles,3WrFJ7ztbogyGnTHbHJFl2,Tomorrow Never Knows - Remastered 2009,00oZhqZIQfL9P5CjOP6JsO


In [93]:
track_inspect = 'Tomorrow Never Knows - Remastered 2009'
print(tracks_df_1[tracks_df_1.track_name==track_inspect])
track_ix = tracks_df_1[tracks_df_1.track_name==track_inspect].index[0]
check_ix = np.argsort(S[track_ix])[::-1][:20]
S[track_ix,check_ix]
# tracks_df_1[tracks_df_1.track_name==track_inspect]

     artist_name               artist_id  \
288  The Beatles  3WrFJ7ztbogyGnTHbHJFl2   

                                 track_name                track_id  
288  Tomorrow Never Knows - Remastered 2009  00oZhqZIQfL9P5CjOP6JsO  


array([0.96424582, 0.95699015, 0.95121566, 0.94360369, 0.93358236,
       0.93072345, 0.92823397, 0.92472016, 0.92472016, 0.92459084,
       0.92375737, 0.92293121, 0.91460783, 0.91460783, 0.91460783,
       0.91460783, 0.91460783, 0.90556114, 0.90508402, 0.90419266])

In [94]:
audio_feats_df_1.loc[[track_ix], :]

Unnamed: 0,tempo,valence,liveness,instrumentalness,acousticness,speechiness,mode,loudness,key,energy,danceability
288,125.887,0.0534,0.342,0.00208,8.4e-05,0.0405,1,-6.105,0,0.829,0.38


In [95]:
tracks_df_2.iloc[check_ix,:]

Unnamed: 0,artist_name,artist_id,track_name,track_id
145,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Setting Sun,4Yb8mY1J1nDwrhRlhCa9DS
191,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,The Test,0VfSNKmz24yn5idNxrvV10
55,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,MAH,6Qk5eA59RBcO1bBkq9uSZM
228,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Hey Boy Hey Girl - ARTBAT Remix,6eCGHBR1irzeSUdOLuqV6J
254,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Skipping Like A Stone (feat. Beck) - Single Edit,0yDcwwXLe0mTV2U5M4t0C4
15,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Setting Sun,6yw35eTaS04QFe5PSirX9Z
348,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Out Of Control - Radio Edit,2QDnIFKbJZOK3LK2KqykM4
379,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Setting Sun,0Pi9gXX8JPKGD6kbUPYEPw
211,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Setting Sun,5JBGbFdBtLcCCygw8rtjli
125,The Chemical Brothers,1GhPHrq36VKCY3ucVaZCfo,Setting Sun - 2003 Digital Remaster,3HJy3DZZ6wAYQZ4SDK1YKQ


In [96]:
audio_feats_df_2.iloc[check_ix, :]

Unnamed: 0,tempo,valence,liveness,instrumentalness,acousticness,speechiness,mode,loudness,key,energy,danceability
145,135.041,0.191,0.367,0.0543,0.000401,0.142,1,-1.594,0,0.988,0.412
191,117.412,0.294,0.166,0.355,0.00621,0.053,1,-3.965,0,0.879,0.297
55,129.947,0.212,0.346,0.238,0.0296,0.0617,1,-6.083,0,0.927,0.688
228,125.974,0.254,0.12,0.372,0.000777,0.0706,1,-3.563,0,0.965,0.546
254,127.983,0.181,0.362,0.00423,0.0881,0.0512,1,-6.733,5,0.78,0.452
15,135.034,0.231,0.353,0.133,0.000665,0.212,1,-1.175,0,0.995,0.447
348,131.99,0.337,0.309,0.0236,0.00219,0.035,0,-3.594,5,0.943,0.528
379,135.041,0.248,0.354,0.13,0.000738,0.223,1,-1.169,0,0.995,0.446
211,135.041,0.248,0.354,0.13,0.000738,0.223,1,-1.169,0,0.995,0.446
125,135.041,0.191,0.367,0.0543,0.000401,0.142,1,-1.594,7,0.988,0.412


In [343]:
# CIRCLE_OF_FIFTHS = [0, 7, 2, 9, 4, 11, 6, 1, 8, 3, 10, 5]
audio_feats_df_2[audio_feats_df_2.key.isin([11, 6, 1])].sort_values(['key', 'tempo'])

Unnamed: 0,tempo,valence,liveness,instrumentalness,acousticness,speechiness,mode,loudness,key,energy,danceability
293,72.378,0.0855,0.263,0.00053,0.00719,0.118,1,-5.983,1,0.844,0.335
119,77.495,0.0817,0.928,0.00262,0.638,0.0537,1,-13.518,1,0.495,0.294
252,103.284,0.033,0.804,0.936,0.845,0.219,1,-24.71,1,0.551,0.232
136,119.967,0.582,0.893,0.0239,0.000252,0.0362,0,-8.981,1,0.826,0.527
64,144.499,0.623,0.952,0.000801,0.123,0.0312,1,-8.788,1,0.7,0.412
87,151.974,0.336,0.939,0.000361,0.00621,0.0517,0,-8.531,1,0.884,0.525
14,159.05,0.378,0.157,0.0,0.371,0.166,1,-7.165,1,0.773,0.253
142,160.131,0.543,0.981,0.00864,0.0209,0.105,1,-9.852,1,0.83,0.633
58,160.177,0.684,0.958,0.35,0.00289,0.0649,1,-9.814,1,0.897,0.538
127,178.082,0.363,0.909,0.00412,0.000279,0.176,0,-9.186,1,0.909,0.411


In [345]:
tracks_df_2[audio_feats_df_2.key.isin([11, 6, 1])]

Unnamed: 0,artist_name,artist_id,track_name,track_id
1,David Bowie,0oSGxfWSnnOXhD2fKuz2Gy,Hang On To Yourself - Demo,5vv4Fn4G4fc1uuaDg3lhyc
14,David Bowie,0oSGxfWSnnOXhD2fKuz2Gy,Ziggy Stardust - Sounds Of The 70s: John Peel ...,3dc9UYPzC1wsWGx7VGKSmI
32,David Bowie,0oSGxfWSnnOXhD2fKuz2Gy,All The Young Dudes (Live) [Stereo] - Live; St...,2lS36OEks0Zc7vdt7lk5ej
33,David Bowie,0oSGxfWSnnOXhD2fKuz2Gy,Oh! You Pretty Things (Live) [Stereo] - Live; ...,3RWc7T8yFuEnTPAGrg7wZ1
35,David Bowie,0oSGxfWSnnOXhD2fKuz2Gy,Moonage Daydream (Live) [Stereo] - Live; Stereo,2o98fGp4t5bNIS6ky0xa6u
37,David Bowie,0oSGxfWSnnOXhD2fKuz2Gy,Warszawa - Live Moonage Daydream Edit,4xScGPuPY1KXTUtofpmeZg
42,David Bowie,0oSGxfWSnnOXhD2fKuz2Gy,Subterraneans - 2017 Remaster,7sPFp70VxgvrZVorzVBtSD
48,David Bowie,0oSGxfWSnnOXhD2fKuz2Gy,"Thursday's Child - Live at the Kit Kat Klub, N...",3b535uOV5AVOtDdAICJuFz
58,David Bowie,0oSGxfWSnnOXhD2fKuz2Gy,I’m Afraid Of Americans - Live at the Kit Kat ...,5DdO7Bg3KB5KyzXJTv2XR7
60,David Bowie,0oSGxfWSnnOXhD2fKuz2Gy,Thursday's Child - Live at the Elysée Montmart...,5KEdsAcV9pm7AK8v4VxAeC


In [454]:
tracks_df_1[tracks_df_1.track_name=='Neighborhood #1 (Tunnels)']
audio_feats_df_1[tracks_df_1.track_name=='Neighborhood #1 (Tunnels)']

Unnamed: 0,tempo,valence,liveness,instrumentalness,acousticness,speechiness,mode,loudness,key,energy,danceability
112,126.914,0.211,0.29,0.0142,0.0647,0.0324,1.0,-5.566,0.0,0.738,0.282


In [459]:
tracks_df_2[tracks_df_2.track_name.str.contains('Hero')]
# audio_feats_df_2[tracks_df_2.track_name=='Neighborhood #1 (Tunnels)']
(tracks_df_2.track_id == '7Jh1bpe76CNTCgdgAdBw4Z').sum()

0