# Load Credentials
load credentials from id.json into env vars

In [None]:
import json
import os

with open('id.json', 'r') as f:
  data = json.load(f)

os.environ["SPOTIPY_CLIENT_ID"] = data["client_id"]
os.environ["SPOTIPY_CLIENT_SECRET"] = data["client_secret"]
os.environ["SPOTIPY_REDIRECT_URI"] = r'http://localhost:8080'

# Get all liked songs

In [None]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth

scope = "user-library-read"
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

results = []

offset = 0
num_songs_total = sp.current_user_saved_tracks(limit=1)['total']

# Information returned in batches in dictionary
while offset < num_songs_total:
    print(f"Processing batch: {offset}/{num_songs_total}", end='\r')
    curr_batch = sp.current_user_saved_tracks(limit=50, offset=offset)['items']
    for track in curr_batch:
        res = track['track']
        del res['available_markets']
        del res['album']['available_markets']
        results.append(res)
        
    offset += 50
    
print("\nFinished loading liked songs")

# Get audio features for all liked songs

In [None]:
offset = 0
batch_size = 100
while offset < num_songs_total:
    # compile list to give query
    print(f"Processing batch: {offset}/{num_songs_total}", end='\r')
    uri_list = []

    curr_size = min(batch_size, num_songs_total - offset)
    for i in range(curr_size):
        uri_list.append(results[offset+i]["uri"])

    # conduct query
    curr_batch = sp.audio_features(uri_list)

    # associate query result with existing results dictionary
    keys_i_hate = ['id', 'type', 'uri', 'track_href', 'analysis_url', 'duration_ms', 'time_signature']
    for i in range(curr_size):
        for key in keys_i_hate:
            del curr_batch[i][key]
        results[offset+i]["audio_features"] =  curr_batch[i]

    # increment batch (can also be done outside of loop)
    offset += batch_size

# Group Liked Songs using metadata

## Convert audio feature dict into numpy array for KNN

In [None]:
import numpy as np
audio_features = list(results[0]['audio_features'].keys())

audio_features_arr = np.zeros((num_songs_total, len(audio_features)))

for i in range(num_songs_total):
    for j, audio_feature in enumerate(audio_features):
        audio_features_arr[i][j] = results[i]['audio_features'][audio_features]

## Create KNN

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd

sc = StandardScaler()
sc.fit(audio_features_arr)

std_data = sc.transform(audio_features_arr)

num_comp = 2
pca = PCA(n_components=num_comp)
pca.fit(std_data)
pca_result = pca.transform(std_data)


In [None]:
from sklearn.manifold import TSNE

TSNE = TSNE(n_components=2, perplexity=50, n_iter=5000, learning_rate=200)

tsne_results = TSNE.fit_transform(std_data)

In [None]:
for i in range(len(audio_features)):
    # visualize
    df_tsne = pd.DataFrame(tsne_results, columns=['t-sne-one', 't-sne-two'])
    df_tsne['label'] = audio_features_arr[:,i]
    plt.figure(figsize=(10,10))
    sns.scatterplot(
        x="t-sne-one", y="t-sne-two",
        hue="label",
        data=df_tsne,
        alpha=0.5
    )
    plt.title(audio_features[i])
    plt.show()