In [4]:
import pickle as pkl
import random
from typing import Iterable, Union

from dotenv import dotenv_values
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score , roc_auc_score
from sklearn import tree
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# Custom packages
import helpers as h
from helpers import Playlist, PlaylistCluster

In [5]:
# Set up Spotify API client credentials
config = dotenv_values('.env')
client_id = config["SPOTIFY_CLIENT_ID"]
client_secret = config["SPOTIFY_CLIENT_SECRET"]

client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [6]:
# Load the trained random forest classifier from the previous notebook
with open('./trained_rf.pkl', 'rb') as f:
    rf_classifer = pkl.load(f)

# Load the playlist cluster from the previous notebook
with open('./playlist_cluster.pkl', 'rb') as f:
    PLC = pkl.load(f)

rb_playlist = PLC.playlist_list[0]
lnv_playlist = PLC.playlist_list[1]

Let's ask the random forest classifier if we should add a song to the playlist!

In [14]:
class Song():
    '''Access audio features of a Spotify song.'''

    def __init__(self, song_id:str=None, song_name:str=None, song_dict:dict=None) -> None:
        for ikwarg, kwarg in enumerate([song_id, song_name, song_dict]):
            if kwarg is not None:
                self.input = kwarg
                if ikwarg == 0:
                    self.id = self.input
                    self.attributes = sp.track(self.id)
                    self.name = self.attributes['name']
                elif ikwarg == 2:
                    self.id = self.get_info_from_dict(self.input, 'id')
                    self.name = self.get_info_from_dict(self.input, 'name')

                    
        self.audio_features = sp.audio_features(self.id)
        
        self.data = pd.DataFrame(data=self.audio_features).sort_index(axis='columns')
        # initialize like to none
        self.data['like'] = np.nan
        self.ml_likes = np.nan

        data_multiIndex = pd.MultiIndex.from_frame(pd.DataFrame({'id':[self.id], 'name':[self.name]}))
        self.data.index = data_multiIndex
        
        self.audio_feature_labels = self.data.columns

        ## ML specifically (e.g. random forest)
        self.ml_feature_labels = list(set(self.audio_feature_labels)-\
                {'type', 'id','uri','track_href','analysis_url','like'})
        self.ml_data = self.data.loc[:, self.ml_feature_labels].sort_index(axis='columns')

    def get_info_from_dict(self, track:dict, info_tag:str):
        '''Unnest information from raw_tracks dict.'''

        if info_tag in track['track'].keys():
            attributes = track['track'][info_tag] 
            return attributes

In [15]:
s = Song(song_dict=rb_playlist.raw_tracks[0])
s.data

Unnamed: 0_level_0,Unnamed: 1_level_0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence,like
id,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
5awNIWVrh2ISfvPd5IUZNh,PTT (Paint The Town),0.0735,https://api.spotify.com/v1/audio-analysis/5awN...,0.781,201120,0.814,5awNIWVrh2ISfvPd5IUZNh,1.9e-05,8,0.0565,-2.437,1,0.1,124.028,4,https://api.spotify.com/v1/tracks/5awNIWVrh2IS...,audio_features,spotify:track:5awNIWVrh2ISfvPd5IUZNh,0.546,


In [16]:
# Get a song from Spotify and its features
def grab_a_song():
    # Get a random search term or a random track ID
    search_term = random.choice(['love', 'happy', 'dance', 'rock', 'jazz'])
    results = sp.search(q=search_term, type='track', limit=50)

    # Get a random track from the search results
    track = random.choice(results['tracks']['items'])
    song = Song(track['id'])

    return song

In [18]:
s = grab_a_song()
s.ml_data

Unnamed: 0_level_0,Unnamed: 1_level_0,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
id,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0BnkfcPXRPqxiqk593hnD9,Dance With Ghosts,0.0774,0.571,161667,0.567,0.000511,10,0.0729,-10.976,0,0.0667,143.996,4,0.202


In [23]:
rf_classifer.predict(s.ml_data)

array([0.])

We need a way to query a lot of random songs from Spotify.