# Predict a Genre

This notebook is the last of five notebooks containing the central work for the project.

This notebook contains an application to predict a track.

In [None]:
# Load best estimator - Random Forest w/ NoOutlierHandling, RobustScaling, BinningFalse, FeatureSelectionFalse
# IMPORTANT: Point this to local estimator when trying out without Google Drive
estimator_path = "/content/drive/My Drive/Spotify Song Classification/data/results/estimators/Random Forest/0.67450_Random Forrest_NoOutlierHandling_RobustScaling_BinningFalse_FeatureSelectionFalse.result"

# Load corresponding scaler that has been fitted to the correct dataset 
# IMPORTANT: Point this to local scaler when trying out without Google Drive
scaler_path = "/content/drive/My Drive/Spotify Song Classification/data/results/robust_scaler.obj"

In [None]:
# Imports
try:
  from google.colab import drive
  drive.mount('/content/drive')
except ImportError:
  pass

import pandas as pd
import numpy as np
import os
import pickle
from sklearn.preprocessing import RobustScaler

!pip install spotipy
import spotipy

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def get_best_estimator():
  with open(res_path, "rb") as f:
    result = pickle.load(f)
  return result["grid"]

In [None]:
display(get_best_estimator())

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=25, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=300,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

# Application: Predict a track

In [None]:
# Attribute definitions
api_fields = ['energy', 'liveness', 'tempo', 'speechiness', 'acousticness', 'instrumentalness', 'time_signature', 'danceability', 'key', 'duration_ms', 'loudness', 'valence', 'mode']
numerical = ['energy', 'liveness', 'tempo', 'speechiness', 'acousticness', 'instrumentalness', 'danceability', 'duration_ms', 'loudness', 'valence']
categorical = ['mode', 'time_signature', 'key']
dataset_columns = numerical + ['IsMode_0.0', 'IsMode_1.0', 'IsTimeSig_1.0', 'IsTimeSig_3.0', 'IsTimeSig_4.0', 'IsTimeSig_5.0', 'IsKey_0.0', 'IsKey_1.0', 'IsKey_2.0', 'IsKey_3.0', 'IsKey_4.0', 'IsKey_5.0', 'IsKey_6.0', 'IsKey_7.0', 'IsKey_8.0', 'IsKey_9.0', 'IsKey_10.0', 'IsKey_11.0']

# Spotify client credentials
client_id = "53bf09bf588f4cb09fa178a3cb4dc0d3"
client_secret = "29ee66296242465f8337a35e2d892b16"

# Track Prediction
def predict_track(track_id):  
  # Connect to Spotify and collect audio features for track
  auth_manager = spotipy.oauth2.SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
  spotify = spotipy.Spotify(auth_manager=auth_manager)
  audio_features = spotify.audio_features(track_id)[0]

  dictionary = {}
  for field in api_fields:
    dictionary[field] = float(audio_features[field])

  df = pd.DataFrame(dictionary, index=[0])

  # Preprocessing 
  df = pd.get_dummies(df, columns=categorical, prefix=["IsMode", "IsTimeSig", "IsKey"])
  empty_df = pd.DataFrame(columns=dataset_columns)
  df = pd.concat([empty_df, df])
  df = df.replace(np.nan, 0)

  with open(scaler_path, "rb") as f:
    scaler = pickle.load(f)
  df[numerical] = scaler.transform(df[numerical])

  # Predict
  return estimator.predict(df)

In [None]:
print("We will rock you (Queen)", predict_track("03jhnLcIT8C4DhXnNecOZv"))
print("Für Elise (Beethoven)", predict_track("67hXs9oizk6evBJxuk42cb"))
print("Take me home, country roads (John Denver)", predict_track("39q7xibBdRboeMKUbZEB6g"))
print("In Memoriam (Moonspell)", predict_track("6cmRAqDpT2iK60AOjGMCxW"))

We will rock you (Queen) ['Rock']
Für Elise (Beethoven) ['Classical Music']
Take me home, country roads (John Denver) ['Country']
In Memoriam (Moonspell) ['Metal']
