# Spotify Classification
## a.k.a How I can tell if I'll like a song or if it's too classical for me.
For this (small) project, I use a simple KNN classifier to determine if I'd like a song. I use my top 50 (actually 47 in this case) songs as the "good" training set and the top 50 results for "classical" on Spotify for my "bad" training set.

In [None]:
# I'm using spotipy to hook into the Spotify API and am using 
# pandas to manipulate the data with sklearn as my machine learning platform
import spotipy
import pandas as pd
import spotipy.oauth2 as oauth2
import sklearn
from sklearn.neighbors import KNeighborsClassifier

In [None]:
# Information for the API to connect
CLIENT_ID = ""
CLIENT_SECRET = ""
REDIRECT_URI = ""

username = ""
scope = "user-top-read"

In [None]:
# Create the auth token (because I want my top songs)
token = spotipy.util.prompt_for_user_token(username,scope,client_id=CLIENT_ID,client_secret=CLIENT_SECRET,redirect_uri=REDIRECT_URI)

In [None]:
# Hook into the API
sp = spotipy.Spotify(auth = token)

In [None]:
# Grab my top tracks (this is just the query)
top_tracks = sp.current_user_top_tracks(limit = 50)

In [None]:
# Get the actual tracks from the query
top_tracks_items = top_tracks["items"]

In [None]:
# Organize track IDs and names
track_id_list = []
track_name_list = []
for track in top_tracks_items:
    track_id = track["id"]
    track_name = track["name"]
    track_id_list.append(track_id)
    track_name_list.append(track_name)

In [None]:
# Get feature list (for classification)
track_features_list = sp.audio_features(track_id_list)

In [None]:
# Create DataFrame to store track information
track_df = pd.DataFrame([track_id_list, track_name_list], index = ["id", "name"]).transpose()

In [None]:
# Create DataFrame to store track features
track_features_df = pd.DataFrame(track_features_list, index = range(50))

In [None]:
# Combine track information with features by ID
track_with_features_df = pd.merge(left = track_df, right = track_features_df, on="id")

In [None]:
# Remove duplicates which somehow got in
track_cleaned_df = track_with_features_df.drop_duplicates().set_index("id")

In [None]:
# Look for "bad" tracks
bad_tracks = (sp.search("classical", limit = 50)["tracks"]["items"])

In [None]:
# Organize bad tracks
bad_track_id_list = []
bad_track_name_list = []
for track in bad_tracks:
    track_id = track["id"]
    track_name = track["name"]
    bad_track_id_list.append(track_id)
    bad_track_name_list.append(track_name)

In [None]:
# Grab bad track features for classification
bad_track_features_list = sp.audio_features(bad_track_id_list)

In [None]:
# Create bad track information DataFrame
bad_track_df = pd.DataFrame([bad_track_id_list, bad_track_name_list], index = ["id", "name"]).transpose()

In [None]:
# Create bad track feature DataFrame
bad_track_features_df = pd.DataFrame(bad_track_features_list, index = range(50))

In [None]:
# Combine bad track DataFrames
bad_track_with_features_df = pd.merge(left = bad_track_df, right = bad_track_features_df, on="id")

In [None]:
# Clean up duplicates
bad_track_cleaned_df = bad_track_with_features_df.drop_duplicates().set_index("id")

In [None]:
# Generate predictors for "good" tracks
track_predictors = track_cleaned_df[["danceability", "energy", "instrumentalness", "liveness", "loudness", "speechiness", "valence"]]

In [None]:
# Generate predictors for "bad" tracks
bad_track_predictors = bad_track_cleaned_df[["danceability", "energy", "instrumentalness", "liveness", "loudness", "speechiness", "valence"]]

In [None]:
# Initialize classifier and fit it
classifier = KNeighborsClassifier()
classifier.fit(pd.concat([track_predictors, bad_track_predictors]), [1] * 47 + [0] * 50) # For some reason, there were only 47 "good" tracks

In [None]:
# Search for test track (first item)
test_track = sp.search("Bach")["tracks"]["items"][0]

In [None]:
# Organize test track
test_df = pd.DataFrame(sp.audio_features(test_track["id"]))

In [None]:
# Create test track feature list
test_item = test_df[["danceability", "energy", "instrumentalness", "liveness", "loudness", "speechiness", "valence"]]

In [None]:
# Predict based on test track (1 = Good, 0 = Bad)
classifier.predict(test_item)