In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
% matplotlib inline

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [3]:
df = pd.read_csv('fma_metadata/df.csv')

In [4]:
df.head()

Unnamed: 0,track_id,track_title,track_genre_top,audio_features_acousticness,audio_features_danceability,audio_features_energy,audio_features_instrumentalness,audio_features_liveness,audio_features_speechiness,audio_features_tempo,...,temporal_features_214,temporal_features_215,temporal_features_216,temporal_features_217,temporal_features_218,temporal_features_219,temporal_features_220,temporal_features_221,temporal_features_222,temporal_features_223
0,2,Food,Hip-Hop,0.416675,0.675894,0.634476,0.010628,0.177647,0.15931,165.922,...,-1.992303,6.805694,0.23307,0.19288,0.027455,0.06408,3.67696,3.61288,13.31669,262.929749
1,3,Electric Ave,Hip-Hop,0.374408,0.528643,0.817461,0.001851,0.10588,0.461818,126.957,...,-1.582331,8.889308,0.258464,0.220905,0.081368,0.06413,6.08277,6.01864,16.673548,325.581085
2,5,This World,Hip-Hop,0.043567,0.745566,0.70147,0.000697,0.373143,0.124595,100.26,...,-2.288358,11.527109,0.256821,0.23782,0.060122,0.06014,5.92649,5.86635,16.013849,356.755737
3,134,Street Music,Hip-Hop,0.452217,0.513238,0.56041,0.019443,0.096567,0.525519,114.29,...,-1.452696,2.356398,0.234686,0.19955,0.149332,0.0644,11.26707,11.20267,26.45418,751.147705
4,139,CandyAss,Folk,0.10655,0.260911,0.607067,0.835087,0.223676,0.030569,196.961,...,-3.078667,12.411567,0.270802,0.2727,0.025242,0.06404,2.43669,2.37265,3.897095,37.866043


In [5]:
X,y = df.iloc[:,4:],df.iloc[:,3]

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

#KNN

In [7]:
KNN = KNeighborsClassifier()

In [8]:
parameters = {"n_neighbors" : [3,6,10,15]}

In [9]:
clf = GridSearchCV(estimator=KNN, param_grid=parameters, scoring = 'accuracy', verbose = 1)

In [10]:
clf.fit(X_train,y_train) 

In [11]:
clf.best_score_,clf.best_estimator_

AttributeError: 'GridSearchCV' object has no attribute 'best_score_'

In [None]:
KNN = clf.best_estimator_

In [None]:
KNN.fit(X_train,y_train)

In [None]:
y_pred = KNN.predict(X_test)
accuracy_score(y_test, y_pred)

In [None]:
KNN_pred = pd.crosstab(y_test, y_pred, rownames=['Actual Genres'], colnames=['Predicted Genres'])
KNN_pred

#RandomForest

In [None]:
RFC = RandomForestClassifier()

In [None]:
parameters = {"n_estimators" : [10,20,30,40], "max_depth" : [10,20,30,40]}

In [None]:
clf = GridSearchCV(estimator=RFC, param_grid=parameters, scoring = 'accuracy', verbose = 1)

In [None]:
clf.fit(X_train,y_train)    

In [None]:
clf.best_score_,clf.best_estimator_

In [None]:
RFC = clf.best_estimator_

In [None]:
RFC.fit(X_train,y_train)

In [None]:
y_pred = RFC.predict(X_test)

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
RFC_pred = pd.crosstab(y_test, y_pred, rownames=['Actual Genres'], colnames=['Predicted Genres'])
RFC_pred

#GradientBoosting

In [None]:
GBC = GradientBoostingClassifier(min_samples_split = 100, min_samples_leaf = 30, max_depth = 5, 
                                 max_features = 'sqrt', subsample = 0.8)

In [None]:
GBC.fit(X_train,y_train)

In [None]:
y_pred = GBC.predict(X_test)

In [None]:
accuracy_score(y_test,y_pred)

In [None]:
features = X_train.columns.values
importances = GBC.feature_importances_
indices = np.argsort(importances)[::-1][0:9]

plt.title('Feature Importances')
plt.barh(range(len(indices)), importances[indices], color='r', align='center')
plt.yticks(range(len(indices)), features[indices]) ## removed [indices]
plt.xlabel('Relative Importance')
plt.show()

In [None]:
GBC_pred = pd.crosstab(y_test, y_pred, rownames=['Actual Genres'], colnames=['Predicted Genres'])
GBC_pred

In [None]:
GBC_pred.apply(lambda r: r/r.sum(), axis=1).round(2)

In [None]:
import seaborn as sns

In [None]:
fig, (ax0,ax1,ax2) = plt.subplots(1, 3, sharex=True, sharey=True)
cbar_ax = fig.add_axes([.91,.3,.03,.4])
sns.heatmap(KNN_pred.apply(lambda r: r/r.sum(), axis=1),ax=ax0,cbar=True,vmin=0,vmax=1,cbar_ax = cbar_ax, cmap="YlGnBu")
ax0.set_title('KNN')
sns.heatmap(RFC_pred.apply(lambda r: r/r.sum(), axis=1),ax=ax1,cbar=True,vmin=0,vmax=1,cbar_ax = cbar_ax, cmap="YlGnBu")
ax1.set_title('RFC')
sns.heatmap(GBC_pred.apply(lambda r: r/r.sum(), axis=1),ax=ax2,cbar=True,vmin=0,vmax=1,cbar_ax = cbar_ax, cmap="YlGnBu")
ax2.set_title('GBC')
fig.suptitle('Classifier comparison',fontsize=15)

plt.show()