# SpotiFinder


---

## Spotify request class

In [1]:
import sys
import json
import timeit
import spotipy
import spotipy.util as util
from dotenv import load_dotenv


class SpotiFinder():
    '''docstring'''
    load_dotenv()
    
    
    def __init__(self):
        self.sp       = None
        self.token    = None
        self.username = None
        
        
    def authenticate(self):
        '''The username needs to be the one written on the top right of spotify.
        I can\'t find a way to get this info without asking for it from a user, but it has to be done.
        '''
        
        
        self.username = 'a4kqw8b9sws8zwd5bfjdjy4zk'
        scope         = 'user-library-read'
        self.token    = util.prompt_for_user_token(self.username
                                                  ,scope
                                                  )
        self.sp       = spotipy.Spotify(auth = self.token)
        
    def get_saved_tracks(self, limit = 20):
        '''Grabs a list of all a user's saved tracks ids'''
        

        saved_tracks  = self.sp.current_user_saved_tracks(limit = limit)
        track_id_list = []
        
        for track in saved_tracks['items']:
            track_id_list.append(track['track']['id'])
        return track_id_list
    
    def track_list_features(self, track_id_list):
        '''Takes in a list of track ids and returns a feature list for use with our nearest neighbors model.'''
        
        
#         start        = timeit.default_timer()
        rows         = []
        batchsize    = 100
        None_counter = 0

        for iterator in range(0, len(track_id_list), batchsize):

            batch           = track_id_list[iterator: iterator + batchsize]
            feature_results = self.sp.audio_features(batch)

            for i, features in enumerate(feature_results):
                if features == None:
                    None_counter = None_counter + 1
                else:
                    rows.append(features)

#         print('Number of tracks where no audio features were available:', None_counter)
#         stop = timeit.default_timer()
#         print('Time to run this code (in seconds):', stop - start)
        return rows


    def get_saved_playlists(self, limit = 50):
        '''Grabs a dictionary of all a user's saved playlists and the track ids within them.'''
        
        
        playlists      = self.sp.user_playlists(self.username, limit = limit)
        dict_playlists = {}
        list_track_ids = []

        for playlist in playlists['items']:
            for track in sp.playlist(playlist['id'], fields = "tracks,next")['tracks']['items']:
                list_track_ids.append(track['track']['id'])
            dict_playlists.update({playlist['name']: list_track_ids})
        
        return dict_playlists
    
    def track_lookup(self, track_ids):
        '''Looks up a track based on track id.'''
        
        
        track = self.sp.track(track_ids)
        return track

### Manual Control

In [17]:
from dotenv import load_dotenv
import spotipy
import spotipy.util as util
# from spotipy.oauth2 import SpotifyClientCredentials



load_dotenv()

username      = 'a4kqw8b9sws8zwd5bfjdjy4zk'
scope         = 'user-library-read'
token         = util.prompt_for_user_token(username
                                          ,scope
                                          )

sp            = spotipy.Spotify(auth = token)

#### Save json

In [None]:
# import json


# with open('song_data.json', 'w') as outfile:
#     json.dump(json_data, outfile)

---

## Nearest Neighbors Model

In [16]:
from sklearn.neighbors import NearestNeighbors
import pandas as pd


df       = pd.read_csv('http://www.zernach.com/wp-content/uploads/2020/02/SpotifyAudioFeaturesApril2019.csv')

target   = 'track_id'
features = ['acousticness'
           ,'danceability'
           ,'energy'
           ,'instrumentalness'
           ,'key'
           ,'liveness'
           ,'loudness'
           ,'mode'
           ,'speechiness'
           ,'tempo'
           ,'valence'
           ]


X = df[features]
y = df[target]


nearest = 10
nn      = NearestNeighbors(n_neighbors = nearest
                          ,algorithm   = 'kd_tree'
                          )
nn.fit(X, y)

NearestNeighbors(algorithm='kd_tree', leaf_size=30, metric='minkowski',
                 metric_params=None, n_jobs=None, n_neighbors=10, p=2,
                 radius=1.0)

In [37]:
# Save
import joblib


filename = 'Nearest_Neighbors.sav'
joblib.dump(nn, filename)
print('Model saved!')

Model saved!


In [2]:
# Load
import joblib


filename = 'Nearest_Neighbors.sav'
nn       = joblib.load(filename)
print('Model Loaded.')

Model Loaded.


### Model function predictor 9001

In [3]:
def nn_predict(list_track_ids, output = 'df', save = False):
    '''
    '''
    import pandas as pd
    import json


    df       = pd.read_csv('http://www.zernach.com/wp-content/uploads/2020/02/SpotifyAudioFeaturesApril2019.csv')
    
    target   = 'track_id'
    features = ['acousticness'
               ,'danceability'
               ,'energy'
               ,'instrumentalness'
               ,'key'
               ,'liveness'
               ,'loudness'
               ,'mode'
               ,'speechiness'
               ,'tempo'
               ,'valence'
               ]
    
    # Needs to be able to grab track info from spotify
    spot = SpotiFinder()
    spot.authenticate()
    
    list_tracks_unclean   = spot.track_list_features(list_track_ids)
    df_tracks_features    = pd.DataFrame(list_tracks_unclean)[features]
    predictions           = nn.kneighbors(df_tracks_features)[1][0]
    pred_series_track_ids = df.iloc[predictions]['track_id']
    
    def get_track_info(pred_series_track_ids):
        pred_df_tracks = pd.DataFrame()
    
        for track_id in pred_series_track_ids:
            pred_df_tracks = pred_df_tracks.append(spot.track_lookup(track_id), ignore_index = True)
        return pred_df_tracks
    
    
    if   output == 'track_ids':
        return pred_series_track_ids
    
    elif output == 'df':
        
        return df.iloc[predictions]
#         return get_track_info(pred_series_track_ids)
    
    elif output == 'json':
#         pred_df_tracks = get_track_info(pred_series_track_ids)
#         json_data = json.loads(pred_df_tracks.to_json())
        
        json_data = json.loads(df.iloc[predictions].to_json())
        
        if save == True:
            with open('song_data.json', 'w') as outfile:
                json.dump(json_data, outfile)
        
        return json_data
    

In [4]:
spot       = SpotiFinder()
spot.authenticate()
track_list = spot.get_saved_tracks()

In [5]:
nn_predict(track_list, 'json', save = True)

{'artist_name': {'96981': 'Mötley Crüe',
  '92537': 'INTERCOM',
  '92995': 'Rudimental',
  '47922': 'Loudar',
  '110439': 'TINI',
  '14677': 'Melendi',
  '15724': 'Cloudsz',
  '85715': 'Hurshel',
  '20315': 'Jadel Wide',
  '63125': 'Gavlyn'},
 'track_id': {'96981': '3U5qQ9igqDatONCgFOj6Rb',
  '92537': '123QYCPba7ukesKyFDPq9J',
  '92995': '1mR6MzJg79TAROh1lIsx3P',
  '47922': '6h3Iq2HIQauDoncGLEEktP',
  '110439': '6JpjnU0v7J3r7iHcWxLo6P',
  '14677': '6UfiaiKYlWlrfzeX4d31N1',
  '15724': '3a7W3tdbf702SWP4j4Kaan',
  '85715': '56grSrz0XTwvfMaCPumnV8',
  '20315': '6mGdr544eFo0C3ptj37squ',
  '63125': '4S1gqHR7zroJO9TM2E9wju'},
 'track_name': {'96981': 'Kickstart My Heart',
  '92537': 'Truth and Malice',
  '92995': 'Adrenaline (feat. OLIVIA)',
  '47922': 'Reflecting Fragments - Original',
  '110439': 'Princesa',
  '14677': 'Para Que No Se Escapen Tus Mariposas',
  '15724': 'Let You Go',
  '85715': 'Fair',
  '20315': 'Lentamente',
  '63125': 'What You Want'},
 'acousticness': {'96981': 0.00169,


---

### Neural Net Model (Failed)

In [None]:
model.load('Sequential_model.h5')
print('Saved model!')

In [None]:
import pandas as pd
import numpy
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
import wandb
from wandb.keras import WandbCallback
from category_encoders import TargetEncoder
from category_encoders import OrdinalEncoder
from tensorflow import keras


seed = 7
numpy.random.seed(seed)

# Important Hyperparameters
batch_size = 100
epochs     = 1
optimizer  = 'adam'

print('Building model...')
model = Sequential()

# Input
model.add(Dense(13, input_dim = 13, activation = 'sigmoid'))

# Hidden
model.add(Dense(28   ,activation = 'sigmoid'))
model.add(Dense(56   ,activation = 'sigmoid'))

# Output
model.add(Dense(130663, activation = 'sigmoid'))
print('Model set.')

In [None]:
#Compile
print('Compiling...')
model.compile(loss      = 'sparse_categorical_crossentropy'
             ,optimizer = optimizer
             ,metrics   = ['accuracy']
             )

print('Finished.')

In [None]:
model.save('Sequential_model.h5')
print('Saved model!')

In [None]:
from tensorflow.keras.models import load_model


model = load_model('Sequential_model.h5')

model.summary()


score = model.evaluate(X_test, y_test, verbose = 0)
print(f'{model.metrics_names[1]}: {score[1] * 100:.2f}')

In [None]:
model.predict(X_test)

In [49]:
print('Building hist...')
history = model.fit(X_train, y_train
#                    ,validation_data = (X_test, y_test)
                   ,epochs          = epochs
                   ,batch_size      = batch_size
                   ,verbose         = False
                   )
print('Hist complete.')

model.summary()

Building hist...
Hist complete.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 13)                182       
_________________________________________________________________
dense_9 (Dense)              (None, 28)                392       
_________________________________________________________________
dense_10 (Dense)             (None, 56)                1624      
_________________________________________________________________
dense_11 (Dense)             (None, 130663)            7447791   
Total params: 7,449,989
Trainable params: 7,449,989
Non-trainable params: 0
_________________________________________________________________
