# Testing different models

In [1]:
# Import packages
import pandas as pd 
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_validate, ParameterSampler
from sklearn.feature_selection import RFE

# Read in the data
data = pd.read_csv('./../data/features_v2.csv', index_col=0)
X = data.drop(columns=['track_id', 'genre_code'])
X = pd.get_dummies(X, columns=['major', 'key'], drop_first=True).values
y = data[['genre_code']].values.ravel()


## Neural Network (MLP)

In [2]:
# Get the CV average across 7 folds for an mlp
p_dict = {'hidden_layer_sizes':range(100, 1001, 50), 'alpha':np.linspace(.00001, .005, 10)}
param_grid = ParameterSampler(p_dict, 20)
mlp = MLPClassifier(max_iter=500, warm_start=False)
best = 0
best_params = 0
for params in param_grid:
    mlp.set_params(**params)
    scores = cross_validate(mlp, X, y, cv=5)['test_score']
    if np.mean(scores) > best:
        best = np.mean(scores)
        best_params = params
    print('##', end='')
print('\n\nBest Mean CV Score: ', best)
print('Best Parameters:', best_params)


########################################

Best Mean CV Score:  0.2169398740295288
Best Parameters: {'hidden_layer_sizes': 650, 'alpha': 0.002227777777777778}


## KNN

In [4]:
# Get the average CV score over 5 folds
knn = KNeighborsClassifier()
# Get the CV average across 7 folds for an mlp
p_dict = {'n_neighbors':range(5, 101, 5), 'weights':['uniform', 'distance'], 'algorithm':['ball_tree', 'kd_tree'], 'leaf_size':range(15, 51, 5)}    
param_grid = ParameterSampler(p_dict, 20)
best = 0
best_params = 0
for params in param_grid:
    knn.set_params(**params)
    scores = cross_validate(knn, X, y, cv=5)['test_score']
    if np.mean(scores) > best:
        best = np.mean(scores)
        best_params = params
    print('##', end='')
print('\n\nBest Mean CV Score: ', best)
print('Best Parameters:', best_params)

########################################

Best Mean CV Score:  0.19804845174667207
Best Parameters: {'weights': 'distance', 'n_neighbors': 20, 'leaf_size': 30, 'algorithm': 'ball_tree'}
