In [1]:
from sklearn.neighbors import NearestNeighbors, NeighborhoodComponentsAnalysis
import pandas as pd
import numpy as np
import pickle

In [2]:
def load_clean_data():
    """
    Loads data and performs basic data cleaning and normalization.
    Returns one raw-data DataFrame, and one model-ready DataFrame.
    
    Example
    -------
    > raw, clean = load_clean_data()
    > type(raw), raw.shape, type(clean), df.clean
      (pandas.core.frame.DataFrame,
      (130326, 17),
      pandas.core.frame.DataFrame,
      (130326, 15))
    """
    data = pd.read_csv("Data/spotify2019.csv")
    data = data[~data.track_id.duplicated(keep='first')]

    df = data.drop(columns=['artist_name','track_name'])

    num_cols = ['acousticness', 'danceability', 'duration_ms', 'energy', 
                'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 
                'speechiness', 'tempo', 'time_signature', 'valence', 'popularity']

    for i in num_cols:
        df[i] = df[i] / np.abs(df[i]).max()

    return data, df

def suggest(song_id=None, n_suggestions=1, output_format='records'):
    """
    Suggests Spotify song(s) given one song id.
    
    Parameters
    ----------
    song_id: str
        Song id from which to base suggestions
    
    n_suggestions: int {1, 3, 5, 10, 15, 20}, default 1
        Number of songs to suggest

    output_format: str, default 'records'
        Output format of the JSON string:
            ‘split’ : dict like {‘index’ -> [index], ‘columns’ -> [columns], ‘data’ -> [values]}
            ‘records’ : list like [{column -> value}, … , {column -> value}]
            ‘index’ : dict like {index -> {column -> value}}
            ‘columns’ : dict like {column -> {index -> value}}
            ‘values’ : just the values array
            ‘table’ : dict like {‘schema’: {schema}, ‘data’: {data}}
        Note: Directly passed to pandas.DataFrame.to_json(orient=output_format) 
    
    Example
    -------
    > example = df[df.track_id == '6Wosx2euFPMT14UXiWudMy']
    > example.artist_name
      R3HAB
    > example.track_name
      Radio Silence
    > suggestion = suggest('6Wosx2euFPMT14UXiWudMy')
    > suggestion
      '[{"artist_name":"Anderson .Paak",
         "track_id":"7GvKDZP30uHX0p8y3Kn6BM",
         "track_name":"Brother\'s Keeper (feat. Pusha T)"}]'
    """
    song = np.array(df[df.track_id == song_id].drop(columns='track_id'))

    loaded_model = pickle.load(open(f'./model/model_{n_suggestions}_suggestions.sav', 'rb'))

    output = data.iloc[loaded_model.kneighbors(song)[1][0][1:]]

    drop_cols = ['acousticness', 'danceability', 'duration_ms', 'energy', 
                 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 
                 'speechiness', 'tempo', 'time_signature', 'valence', 'popularity']
    
    suggestion = output.drop(columns = drop_cols).to_json(orient=output_format)

    return suggestion

In [3]:
data, df = load_clean_data()

In [4]:
suggest('6Wosx2euFPMT14UXiWudMy', 3)

'[{"artist_name":"Anderson .Paak","track_id":"7GvKDZP30uHX0p8y3Kn6BM","track_name":"Brother\'s Keeper (feat. Pusha T)"},{"artist_name":"Luke Combs","track_id":"72DdM3q1npvvRUoa1YQEOd","track_name":"Honky Tonk Highway"},{"artist_name":"First Aid Kit","track_id":"460VbsObk5l2FGYxUDtaA6","track_name":"Rebel Heart"}]'