# Load Credentials
load credentials from id.json into env vars

# Get all liked songs

In [None]:
import spotipy
import spotify_moods.auth as auth
import spotify_moods.spotify_moods as moods
import spotipy.util as util

In [None]:
scope = 'user-library-read playlist-modify-private'
username = 'alecxu'

auth.export_client_id('id.json')
sp = auth.user_auth(username, scope)

results = moods.get_all_songs(sp)

In [None]:
song_name_dict = {result['name'] : i for i, result in enumerate(results)}

# Get audio features for all liked songs

In [None]:
num_songs_total = sp.current_user_saved_tracks(limit=1)['total']
uri_list = []
for result in results:
    uri_list.append(result["uri"])

audio_features = moods.get_audio_features(sp, uri_list)

# Group Liked Songs using metadata

# Extract "Useful" features for song recommendations

*Useful features queried from spotify*
1. 2. Danceability/Energy: Seems like gradient goes from more to less quite reliably, Seems highly correlated
3. Mode: Major/Minor, very well clustered. COULD be useful or not should try out before conclusion
4. Speechiness: All speechy songs are in one place, can probably use
5. 6. Acousticness/Instrumentalness: Seems like instrumental is almost strictly superset, these are well clustered
7. Liveness: seems well clusterd
8. Valence: "Happiness" Seems very important although not the best clustered. Local clusters

### Convert audio feature dict into numpy array for processing

In [None]:
import numpy as np
af_used = ['danceability', 'energy', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence']
audio_features_arr = np.zeros((num_songs_total, len(af_used)))

for i in range(num_songs_total):
    for j, audio_feature in enumerate(af_used):
        audio_features_arr[i][j] = audio_features[i][audio_feature]

## Standardize Data

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd

sc = StandardScaler()
sc.fit(audio_features_arr)

std_data = sc.transform(audio_features_arr)

num_comp = 2
pca = PCA(n_components=num_comp)
pca.fit(std_data)
pca_result = pca.transform(std_data)


In [None]:
from matplotlib import pyplot as plt
import seaborn as sns
def graph_embeddings(results: np.array, names: list, figsize=(5,5)):


    df_data = pd.DataFrame(results, columns=['data-one', 'data-two'])

    for i in range(len(names)):  
        df_data['label'] = audio_features_arr[:,i]
        plt.figure(figsize=figsize)
        sns.scatterplot(
            x="data-one", y="data-two",
            hue="label",
            data=df_data,
            alpha=0.5
        )
        plt.title(names[i])
        plt.show() 

In [None]:
from sklearn.manifold import TSNE
import warnings
warnings.filterwarnings('ignore')

TSNE = TSNE(n_components=2, perplexity=50, n_iter=5000, learning_rate=200)
tsne_results = TSNE.fit_transform(std_data)

In [None]:
graph_embeddings(tsne_results, af_used)

In [None]:
import umap
umap_results = umap.UMAP().fit_transform(std_data)

In [None]:
graph_embeddings(umap_results, af_used)

In [None]:
song_name = "Hope"
song_index = song_name_dict[song_name]
song_coords = umap_results[song_index]

sim_songs = moods.return_similar_songs(umap_results, song_coords, 30)
song_names = {'recommendations': [results[res]["name"] for res in sim_songs]}
song_names_df = pd.DataFrame.from_dict(song_names,orient='index').transpose()
song_names_df

In [None]:
song_ids = [results[res]["uri"] for res in sim_songs]

playlist_name = f"songs similar to {song_name}"
playlist = sp.user_playlist_create(sp.current_user()['id'], playlist_name, public=True, collaborative=False, description="testing create playlist function")
playlist_results = sp.user_playlist_add_tracks(username, playlist['id'], song_ids)