In [2]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score

from pathlib import Path
from sklearn.metrics import confusion_matrix, plot_confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import RandomizedSearchCV


msdi_path = "../data/msdi/"

In [3]:
def load_mfcc(entry, msdi_path):
    x = np.load(Path(msdi_path) / entry['mfcc'])
    mfcc = x[entry['msd_track_id']]
    mfcc = np.mean(mfcc, axis=0)
    return(mfcc)

In [4]:
df_entry = pd.read_csv(msdi_path+"msdi_mapping.csv")
df_entry = df_entry.drop(["Unnamed: 0"], axis = 1)

In [5]:
load_mfcc(df_entry.iloc[3], msdi_path).shape

(12,)

In [6]:
X = df_entry.apply(lambda entry :
                            load_mfcc(entry, msdi_path), 
                            raw =True,
                            result_type='expand',
                            axis=1 )

In [7]:
X.shape

(30712, 12)

In [8]:
label = df_entry['genre'].array
# print(label.shape)

le = LabelEncoder()
le.fit(label)
class_names = le.classes_
print("Classes : ", le.classes_, "\n")
y_classes = le.transform(label)

print( y_classes.shape)
#y = to_categorical(y_classes)

Classes :  ['Blues' 'Country' 'Electronic' 'Folk' 'Jazz' 'Latin' 'Metal' 'New Age'
 'Pop' 'Punk' 'Rap' 'Reggae' 'RnB' 'Rock' 'World'] 

(30712,)


In [9]:
X = X.values

In [10]:
train_idx = df_entry.index[df_entry['set'] == "train"].tolist()
test_idx = df_entry.index[df_entry['set'] == "test"].tolist()
val_idx = df_entry.index[df_entry['set'] == "val"].tolist()

In [11]:
X_train, y_train = X[train_idx], y_classes[train_idx]
X_test, y_test = X[test_idx], y_classes[test_idx]
X_val, y_val = X[val_idx], y_classes[val_idx]

## Classifier

### Adaboost

In [88]:
clf = AdaBoostClassifier(n_estimators=15, random_state=42)
clf.fit(X_train,y_train)


AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,
                   n_estimators=15, random_state=42)

In [89]:
y_pred = clf.predict(X_test)

In [90]:
print(clf.score(X_test,y_test))

0.3405033340503334


In [91]:
print(classification_report(y_test, y_pred, target_names=class_names))

              precision    recall  f1-score   support

       Blues       0.29      0.02      0.04       190
     Country       0.19      0.13      0.15       194
  Electronic       0.51      0.53      0.52       708
        Folk       0.16      0.10      0.12       136
        Jazz       0.31      0.31      0.31       461
       Latin       0.00      0.00      0.00        81
       Metal       0.42      0.72      0.53       374
     New Age       0.05      0.05      0.05        38
         Pop       0.25      0.30      0.27       463
        Punk       0.12      0.03      0.05        95
         Rap       0.26      0.54      0.35       379
      Reggae       0.28      0.33      0.30       263
         RnB       0.29      0.01      0.01       395
        Rock       0.37      0.38      0.38       827
       World       0.00      0.00      0.00        45

    accuracy                           0.34      4649
   macro avg       0.23      0.23      0.21      4649
weighted avg       0.32   

  _warn_prf(average, modifier, msg_start, len(result))


In [92]:
Y_pred = clf.predict(X)

Y_true = le.transform(df_entry["genre"].values)
print(classification_report(Y_true, Y_pred, target_names=class_names))

              precision    recall  f1-score   support

       Blues       0.29      0.05      0.08       825
     Country       0.25      0.17      0.20      1778
  Electronic       0.52      0.49      0.50      4801
        Folk       0.18      0.12      0.14      1144
        Jazz       0.31      0.34      0.32      2671
       Latin       0.00      0.00      0.00       550
       Metal       0.43      0.68      0.53      2617
     New Age       0.14      0.16      0.15       265
         Pop       0.27      0.28      0.27      3429
        Punk       0.17      0.03      0.05       713
         Rap       0.28      0.51      0.36      2674
      Reggae       0.26      0.31      0.28      1699
         RnB       0.22      0.01      0.01      1836
        Rock       0.32      0.37      0.34      5213
       World       0.00      0.00      0.00       497

    accuracy                           0.34     30712
   macro avg       0.24      0.23      0.22     30712
weighted avg       0.31   

  _warn_prf(average, modifier, msg_start, len(result))


### Random Forest

In [1]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train,y_train)


NameError: name 'RandomForestClassifier' is not defined

In [36]:
y_pred = clf.predict(X_train)

In [37]:
print(clf.score(X_test,y_test))

0.4317057431705743


In [38]:
print(classification_report(y_train, y_pred, target_names=class_names))

              precision    recall  f1-score   support

       Blues       1.00      1.00      1.00       515
     Country       1.00      1.00      1.00      1343
  Electronic       1.00      1.00      1.00      3373
        Folk       1.00      1.00      1.00       844
        Jazz       1.00      1.00      1.00      1837
       Latin       1.00      1.00      1.00       387
       Metal       1.00      1.00      1.00      1731
     New Age       1.00      1.00      1.00       156
         Pop       1.00      1.00      1.00      2322
        Punk       1.00      1.00      1.00       486
         Rap       1.00      1.00      1.00      1917
      Reggae       1.00      1.00      1.00      1246
         RnB       1.00      1.00      1.00      1219
        Rock       1.00      1.00      1.00      3678
       World       1.00      1.00      1.00       329

    accuracy                           1.00     21383
   macro avg       1.00      1.00      1.00     21383
weighted avg       1.00   

## Hyperparameters tuning

In [53]:
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 5, stop = 50, num = 10)]
# Number of features to consider at every split
max_features = ['auto']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(2, 10, num = 5)]
# Minimum number of samples required to split a node

max_leaf_nodes = [2, 5, 9, 13, 17]
min_samples_split = [2, 5, 7]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'max_leaf_nodes' : max_leaf_nodes,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}

In [54]:
rf_random = RandomizedSearchCV(estimator = clf, param_distributions = random_grid, n_iter = 50, cv = 5, verbose=2, random_state=42, n_jobs = 3)
# Fit the random search model
rf_random.fit(X_train, y_train)

Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done  35 tasks      | elapsed:   11.3s
[Parallel(n_jobs=3)]: Done 156 tasks      | elapsed:   59.7s
[Parallel(n_jobs=3)]: Done 250 out of 250 | elapsed:  1.4min finished


RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=RandomForestClassifier(bootstrap=True,
                                                    ccp_alpha=0.0,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
               

In [55]:
rf_random.best_params_

{'n_estimators': 20,
 'min_samples_split': 2,
 'min_samples_leaf': 2,
 'max_leaf_nodes': 17,
 'max_features': 'auto',
 'max_depth': 6,
 'bootstrap': False}

In [56]:
y_pred = rf_random.predict(X_train)
print(classification_report(y_train, y_pred, target_names=class_names))

              precision    recall  f1-score   support

       Blues       0.00      0.00      0.00       515
     Country       0.38      0.01      0.01      1343
  Electronic       0.44      0.74      0.55      3373
        Folk       0.00      0.00      0.00       844
        Jazz       0.30      0.29      0.29      1837
       Latin       0.00      0.00      0.00       387
       Metal       0.61      0.58      0.60      1731
     New Age       0.00      0.00      0.00       156
         Pop       0.28      0.13      0.17      2322
        Punk       0.00      0.00      0.00       486
         Rap       0.36      0.63      0.45      1917
      Reggae       0.60      0.00      0.00      1246
         RnB       0.00      0.00      0.00      1219
        Rock       0.32      0.67      0.43      3678
       World       0.00      0.00      0.00       329

    accuracy                           0.37     21383
   macro avg       0.22      0.20      0.17     21383
weighted avg       0.32   

  _warn_prf(average, modifier, msg_start, len(result))


In [57]:
y_pred = rf_random.predict(X_test)
print(classification_report(y_test, y_pred, target_names=class_names))

              precision    recall  f1-score   support

       Blues       0.00      0.00      0.00       190
     Country       0.17      0.01      0.01       194
  Electronic       0.42      0.77      0.54       708
        Folk       0.00      0.00      0.00       136
        Jazz       0.31      0.22      0.26       461
       Latin       0.00      0.00      0.00        81
       Metal       0.58      0.62      0.60       374
     New Age       0.00      0.00      0.00        38
         Pop       0.23      0.12      0.15       463
        Punk       0.00      0.00      0.00        95
         Rap       0.32      0.63      0.42       379
      Reggae       0.00      0.00      0.00       263
         RnB       0.00      0.00      0.00       395
        Rock       0.33      0.66      0.44       827
       World       0.00      0.00      0.00        45

    accuracy                           0.37      4649
   macro avg       0.16      0.20      0.16      4649
weighted avg       0.26   

  _warn_prf(average, modifier, msg_start, len(result))


In [58]:
print(rf_random.score(X_test,y_test))

0.36846633684663366


## Results on full data

In [59]:
Y_pred = rf_random.predict(X)

Y_true = le.transform(df_entry["genre"].values)
print(classification_report(Y_true, Y_pred, target_names=class_names))

              precision    recall  f1-score   support

       Blues       0.00      0.00      0.00       825
     Country       0.29      0.01      0.01      1778
  Electronic       0.43      0.74      0.54      4801
        Folk       0.00      0.00      0.00      1144
        Jazz       0.30      0.28      0.29      2671
       Latin       0.00      0.00      0.00       550
       Metal       0.62      0.58      0.60      2617
     New Age       0.00      0.00      0.00       265
         Pop       0.28      0.12      0.17      3429
        Punk       0.00      0.00      0.00       713
         Rap       0.35      0.63      0.45      2674
      Reggae       0.50      0.00      0.00      1699
         RnB       0.00      0.00      0.00      1836
        Rock       0.31      0.67      0.42      5213
       World       0.00      0.00      0.00       497

    accuracy                           0.37     30712
   macro avg       0.21      0.20      0.17     30712
weighted avg       0.31   

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

       Blues       0.93      0.63      0.75       825
     Country       0.83      0.84      0.83      1778
  Electronic       0.81      0.91      0.86      4801
        Folk       0.88      0.78      0.82      1144
        Jazz       0.80      0.81      0.80      2671
       Latin       0.99      0.68      0.81       550
       Metal       0.88      0.87      0.88      2617
     New Age       0.99      0.54      0.70       265
         Pop       0.81      0.77      0.79      3429
        Punk       0.98      0.66      0.79       713
         Rap       0.77      0.89      0.83      2674
      Reggae       0.88      0.78      0.83      1699
         RnB       0.88      0.71      0.78      1836
        Rock       0.76      0.87      0.81      5213
       World       1.00      0.62      0.77       497

    accuracy                           0.82     30712
   macro avg       0.88      0.76      0.80     30712
weighted avg       0.83   

## Exporting

In [93]:
Y_proba = clf.predict_proba(X)
print(Y_proba.shape)

df_proba = pd.DataFrame(data = Y_proba, columns = class_names)
df_final = df_entry.join(df_proba)

(30712, 15)


In [94]:
Y_pred = le.inverse_transform(Y_pred)
df_final["mfcc_genre"] = Y_pred

In [95]:
df_final.drop(['album_index',
       'msd_artist_id', 'image_url', 'img', 'mfcc', 'deep_features'], axis=1, inplace=True)

In [96]:
df_final.head()

Unnamed: 0,msd_track_id,genre,set,Blues,Country,Electronic,Folk,Jazz,Latin,Metal,New Age,Pop,Punk,Rap,Reggae,RnB,Rock,World,mfcc_genre
0,TRABKJU128F422A7FE,Metal,train,0.071384,0.077559,0.071399,0.06529,0.056474,0.057201,0.107543,0.006534,0.075655,0.084393,0.060039,0.060116,0.057414,0.092916,0.056082,Metal
1,TRBLDQQ128F92E58B4,Rock,train,0.06612,0.074482,0.071812,0.073876,0.073638,0.065633,0.058644,0.060485,0.0735,0.052563,0.063016,0.056861,0.067499,0.074642,0.067228,Rock
2,TRDMMDE128F14A9052,Rock,train,0.063467,0.067342,0.076728,0.06332,0.066982,0.058834,0.085148,0.051309,0.071269,0.066633,0.061744,0.061995,0.062827,0.082274,0.060127,Metal
3,TRJOPZB128F4250E02,Rock,train,0.067432,0.070425,0.075837,0.062649,0.066678,0.06111,0.069101,0.048971,0.069878,0.066251,0.067812,0.067267,0.064801,0.079591,0.062198,Rock
4,TRJKBVL128F935567B,Rock,train,0.069807,0.069346,0.077877,0.062047,0.064908,0.057085,0.073004,0.055751,0.067003,0.069341,0.065162,0.065139,0.06455,0.078639,0.060341,Rock


In [97]:
export_csv = df_final.to_csv (r'../data/mfcc_results_adaboost.csv', index = None, header=True)