# Run this cell to setup spotify account permissions

In [4]:
from lib.audio_methods import *
import pandas as pd
import time
import spotipy
from sklearn.cluster import KMeans

normalized_feature_stats = pd.read_csv("./data_frames/normalized_feature_stats.csv",
                                       usecols = range(0,10), index_col = 0)
scope = 'playlist-modify-public'
spuser = spotipy.Spotify(auth_manager=spotipy.SpotifyOAuth(scope=scope)) # requires different method of auth for user interaction
spuser.current_user() # setup auth

{'display_name': 'wakizashi101',
 'external_urls': {'spotify': 'https://open.spotify.com/user/wakizashi101'},
 'followers': {'href': None, 'total': 10},
 'href': 'https://api.spotify.com/v1/users/wakizashi101',
 'id': 'wakizashi101',
 'images': [],
 'type': 'user',
 'uri': 'spotify:user:wakizashi101'}

# Run this cell for Kmeans. Can easily swap it out for a different model

In [5]:
# KMeans of K=400
model = KMeans(n_clusters=400,random_state=5523).fit(normalized_feature_stats)
# model = KMeans(n_clusters=400,random_state=5523).fit(train_pca)
# DF of each track's clusters based on kmeans model



# Edit the below function call for your recommendation upload based on a playlist. Increase the delay if you run into issues with a 429 error code

In [6]:
def recommend_from_playlist(playlist_uri, pretrained_classifier, auth, name ,popularity='least', delay=5):
    """
    Automatically uploads a recommended playlist based on the input song
    
    Inputs:
    - playlist_uri, the uri of a playlist. right click a playlist, then share then copy spotify uri and that is your input
    - pretrained_classifier, a trained classifier ready for predictions
    - popularity='least', 
    - delay=1
    """
    all_cluster_family = pd.DataFrame({
    'track':normalized_feature_stats.index,
    'cluster_id':pretrained_classifier.labels_
    })
    songs = getSongsFromPlaylist(playlist_uri, auth)
    
    if len(songs)==0:
        print('No songs found in this playlist, dont confuse playlist with album')
        return
    print('gathered',len(songs),'songs','\nnow fetching feature data. This may take a moment.')
    
    song_uris = []
    for song in songs:
        if song['track']['uri'] != None:
            song_uris += [song['track']['uri']]
            
    feature_data_list = []
    chunked_songs = [song_uris[i:i + 100] for i in range(0, len(song_uris), 100)]
    for chunk in chunked_songs:
        time.sleep(delay) # dont overwhelm api rate limit
        features = getAudioFeaturesChunked(chunk)
        feature_data_list+=features
    to_be_removed = []
    for i in range(len(feature_data_list)):
        if feature_data_list[i] == None:
            to_be_removed+=[i]      
    for i in to_be_removed:
        feature_data_list.pop(i)
        
    # Format features into df to prep for model prediction
    base_track_features_df = pd.DataFrame.from_records(feature_data_list)[['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']]
    
    # normalize features for prediction
    # create new dataframe with lsited columns
    mean_stats = pd.read_csv("./data_frames/normalized_features_mean.csv",header = 0, index_col = 0, squeeze = True)
    std_stats = pd.read_csv("./data_frames/normalized_features_std.csv",header = 0, index_col = 0, squeeze = True)
    normalized_base_track = (base_track_features_df-mean_stats)/std_stats

    base_track_clusters = list(pretrained_classifier.predict(normalized_base_track))
    freq = {} 
    for items in base_track_clusters: 
        freq[items] = base_track_clusters.count(items)
        
    base_track_cluster = max(freq, key=freq.get) # most common cluster
        
    print(base_track_cluster)
    recommends = list(all_cluster_family[all_cluster_family.cluster_id == base_track_cluster].track) # does it work for other models?
    print('found', len(recommends),'songs in cluster')
    print('sorting by popularity')
    recc_song_info = []
    chunked_reccs = [recommends[i:i + 50] for i in range(0, len(recommends), 50)]
    for chunk in chunked_reccs:
        time.sleep(delay) # dont overwhelm api rate limit
        recc_song_info+=getTrackBasicInfoChunked(chunk)
    reverse = popularity!='least' # direction of sorting
    sorted_recc = sorted(recc_song_info, key= lambda x:x['popularity'], reverse=reverse) # sort
    print(len(sorted_recc))
    new_playlist_songs = []
    print('removing songs that you already know')
    for song in sorted_recc:
        if song['uri'] not in song_uris:
            new_playlist_songs+=[song['uri']]
    

    if len(new_playlist_songs)==0:
        print('you already have all of the music we were gonna recommend') # should not feasibly happen on larger datasets
        return
    print('creating playlist')
    uploadRecommendationPlaylist(name, new_playlist_songs, delay, auth)
    return 'Should be uploaded'

recommend_from_playlist('spotify:playlist:1TfVViL8gm1bIGqQibq0Ys', model, spuser, 'kmeans clustering',delay=0)

gathered 143 songs 
now fetching feature data. This may take a moment.
280
found 87 songs in cluster
sorting by popularity
87
removing songs that you already know
creating playlist


'Should be uploaded'