# Jon | Allie Spotify Recommender
_Author_: JAEFinger


## Plan
The overall plan is to:
1. Run lookup() - which finds which song data to use based on a trackid json payload
2. Run predict() - Preprocess the selected song and find k nearest neighbors' track ids
3. Run deliver() - Takes k track ids from predict, finds the associated track ids, then sends it off as a json payload.

## Imports (Packages and DF)

In [112]:
import sklearn
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import joblib
import json
import requests

In [8]:
#Read in DF
songs = pd.read_csv("song_list.csv")
features = ['acousticness', 'danceability',
       'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness',
       'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence',
       'popularity']
print(f'This dataframe looks like {songs.shape} and has features of: {features}')
songs.head()

This dataframe looks like (130663, 17) and has features of: ['acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence', 'popularity']


Unnamed: 0,artist_name,track_id,track_name,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,popularity
0,YG,2RM4jf1Xa9zPgMGRDiht8O,"Big Bank feat. 2 Chainz, Big Sean, Nicki Minaj",0.00582,0.743,238373,0.339,0.0,1,0.0812,-7.678,1,0.409,203.927,4,0.118,15
1,YG,1tHDG53xJNGsItRA3vfVgs,BAND DRUM (feat. A$AP Rocky),0.0244,0.846,214800,0.557,0.0,8,0.286,-7.259,1,0.457,159.009,4,0.371,0
2,R3HAB,6Wosx2euFPMT14UXiWudMy,Radio Silence,0.025,0.603,138913,0.723,0.0,9,0.0824,-5.89,0,0.0454,114.966,4,0.382,56
3,Chris Cooq,3J2Jpw61sO7l6Hc7qdYV91,Lactose,0.0294,0.8,125381,0.579,0.912,5,0.0994,-12.118,0,0.0701,123.003,4,0.641,0
4,Chris Cooq,2jbYvQCyPgX3CdmAzeVeuS,Same - Original mix,3.5e-05,0.783,124016,0.792,0.878,7,0.0332,-10.277,1,0.0661,120.047,4,0.928,0


In [9]:
#There are no nas
songs.isna().sum()
songs.columns

Index(['artist_name', 'track_id', 'track_name', 'acousticness', 'danceability',
       'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness',
       'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence',
       'popularity'],
      dtype='object')

## Preprocess and Fit

In [10]:
df = songs.copy()
df = df.drop(columns=['artist_name', 'track_id', 'track_name'])
df.columns

Index(['acousticness', 'danceability', 'duration_ms', 'energy',
       'instrumentalness', 'key', 'liveness', 'loudness', 'mode',
       'speechiness', 'tempo', 'time_signature', 'valence', 'popularity'],
      dtype='object')

In [11]:
# steps = [('scaler', StandardScaler()), ('knn', NearestNeighbors(n_neighbors=4, algorithm='auto'))]
# pipeline = Pipeline(steps)
# trans_df = pipeline.fit(df)

## Testing a prediction

In [12]:
test1 = df.sample(1)
print(test1)
# test1 = trans_df[0].transform(test1)
# rec_songs = trans_df[1].kneighbors(test1)
# rec_songs[1][0]

       acousticness  danceability  duration_ms  energy  instrumentalness  key  \
12437        0.0156         0.588       330896   0.759          0.000772   11   

       liveness  loudness  mode  speechiness    tempo  time_signature  \
12437     0.125    -6.587     0       0.0506  134.129               4   

       valence  popularity  
12437    0.115          45  


In [13]:
# print(songs.iloc[101179])
# print(songs.iloc[99694])

## Pickling

In [14]:
# # Create joblib
# filename = 'knn_pipeline.joblib'
# joblib.dump(pipeline, filename)

In [15]:
# # Load the picked df for testing
# # load the df from disk
# filename = 'knn_pipeline.joblib'
# pickled_knn = joblib.load(filename)

In [16]:
# test2 = pickled_knn[0].transform(test1)
# rec_songs1 = pickled_knn[1].kneighbors(test1)
# rec_songs1[1][0]

### 1 Run lookup() - which finds which song data to use based on a trackid json payload

In [42]:
def lookup_song(track_id):
    """
    Takes in a sent track_id and returns the song info from pickled df
    :param track_id: track id from spotify db or relative
    :return: row + song information related to sent track_id
    """
    liked_song = joblib.load('song_list.joblib')
    liked_song = liked_song[liked_song['track_id'] == track_id]
    return(liked_song)

### Run create_predicts() - Preprocess the selected song and find k nearest neighbors' track ids

In [102]:
def create_predicts(liked_song):
    """
    Function that returns k number of recommended songs. It returns row #s. The seed song (liked song) is the track_id sent via the JSON payload.
    :param liked_song: a returned df row pertaining to a user's liked song.
    :return: k rows of recommended songs as index numbers
    """
    preloaded_knn = joblib.load('knn_pipeline.joblib')
    liked_song = liked_song.drop(columns=['artist_name', 'track_id', 'track_name'])
    k_rec_songs = preloaded_knn[1].kneighbors(preloaded_knn[0].transform(liked_song))
    k_rec_songs = [k_rec_songs[1][0]]
    k_rec_songs = k_rec_songs[0][1:]
    return(k_rec_songs)

### Run deliver() - Takes k track ids from predict, finds the associated track ids, then sends it off as a json payload.

In [177]:
def deliver(song_list):
    """
    Takes in a list of 3 song indexes and returns a json of song info
    :param song_list: list of 3 track ids
    :return:
    """
    recommended_track_ids = []
    potential_songs = joblib.load('song_list.joblib')
    potential_songs = potential_songs[['artist_name', 'track_name']]
    for song in song_list:
        new_song = potential_songs.iloc[song]
        recommended_track_ids.append(new_song)
    recommended_track_ids = pd.DataFrame(recommended_track_ids)
    return recommended_track_ids

Create json snder

In [180]:
def send_json(df_json):
    json_songs = df_json.to_json(orient='records')
    return json_songs

## Test Data Flow

In [181]:
sent_track_id = '3J2Jpw61sO7l6Hc7qdYV91'
json_song = lookup_song(sent_track_id)
song_predictions = create_predicts(json_song)
payload = deliver(song_predictions)
a_json = send_json(payload)

In [183]:
print(a_json)

[{"artist_name":"Mamuton","track_name":"Mamut - Original mix"},{"artist_name":"Coffee Grind","track_name":"Cravings"},{"artist_name":"Zytrex","track_name":"Bordering - Original mix"}]
