### ML genre classification on FMA with a PCA feature selection step

* use the 'medium' dataset to work with more training examplars
* note medium also only contains tracks where all tagged genres roll up to the same root genre
* 1st attempt with PCA - drop less impactful components
* 2nd attempt - drop highly correlated features

In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import scipy as sp
import IPython.display as ipd

import matplotlib.pyplot as plt

from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import (train_test_split, GridSearchCV, RandomizedSearchCV)
from sklearn.metrics import (classification_report, confusion_matrix, ConfusionMatrixDisplay, f1_score)
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingRandomSearchCV, HalvingGridSearchCV

import utils

RANDOM_STATE = 53

In [2]:
(features, tracks) = utils.load_features()
features.shape, tracks.shape

((106574, 518), (106574, 52))

In [3]:
small = tracks[('set', 'subset')] == 'small'
medium = tracks[('set', 'subset')].isin(['small','medium'])
X = features[medium]
y = tracks[medium][('track','genre_top')]

print(X.shape, y.shape)

(25000, 518) (25000,)


#### arbitrarily eliminate bottom 3 genres due to lack of exemplars


In [4]:
prune = ~y.isin(['Spoken','Blues','Easy Listening'])
y = y[prune]
X = X[prune]

#### build training/test sets

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2,
                                                    random_state=RANDOM_STATE,
                                                    shuffle=True,
                                                    stratify=y)
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#### run PCA and save the components which explain 95% variance

In [6]:
# fit PCA to training set; retain all components
pca = PCA(random_state=RANDOM_STATE)
pca.fit(X_train_scaled)

# locate the component index that gives 95% variance
cum_variance = np.cumsum(pca.explained_variance_ratio_)
for i in range(len(cum_variance)):
    if cum_variance[i] > .95:
        break

print(f'95% explained variance level: {i}')

# now transform the train/test sets
pca = PCA(random_state=RANDOM_STATE, n_components = i)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

X_train_pca.shape, X_test_pca.shape

95% explained variance level: 145


((19829, 145), (4958, 145))

#### 4 Basic ML classifiers with classifiers with PCA reduced features

In [7]:
classifiers = {'SVC': SVC(kernel='linear', random_state=RANDOM_STATE),
               'SVC-RBF': SVC(kernel='rbf', random_state=RANDOM_STATE),
               'LR' : LogisticRegression(random_state=RANDOM_STATE),
               'KNN' :KNeighborsClassifier()
              }

for (name, cl) in classifiers.items():
    cl.fit(X_train_pca, y_train)
    y_pred = cl.predict(X_test_pca)
    print(classification_report(y_test, y_pred))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                     precision    recall  f1-score   support

          Classical       0.73      0.82      0.78       124
            Country       1.00      0.06      0.11        35
         Electronic       0.64      0.81      0.71      1263
       Experimental       0.46      0.34      0.39       450
               Folk       0.65      0.61      0.63       304
            Hip-Hop       0.70      0.62      0.66       440
       Instrumental       0.56      0.43      0.48       270
      International       0.58      0.41      0.48       204
               Jazz       0.62      0.30      0.40        77
Old-Time / Historic       0.98      0.94      0.96       102
                Pop       0.35      0.05      0.08       237
               Rock       0.73      0.87      0.80      1421
           Soul-RnB       0.00      0.00      0.00        31

           accuracy                           0.67      4958
          macro avg       0.62      0.48      0.50      4958
       weighted avg   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                     precision    recall  f1-score   support

          Classical       0.79      0.81      0.80       124
            Country       1.00      0.03      0.06        35
         Electronic       0.66      0.85      0.74      1263
       Experimental       0.52      0.46      0.49       450
               Folk       0.67      0.65      0.66       304
            Hip-Hop       0.74      0.62      0.68       440
       Instrumental       0.59      0.41      0.49       270
      International       0.71      0.44      0.55       204
               Jazz       0.79      0.29      0.42        77
Old-Time / Historic       1.00      0.97      0.99       102
                Pop       0.43      0.06      0.11       237
               Rock       0.75      0.88      0.81      1421
           Soul-RnB       0.00      0.00      0.00        31

           accuracy                           0.69      4958
          macro avg       0.67      0.50      0.52      4958
       weighted avg   

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


                     precision    recall  f1-score   support

          Classical       0.76      0.83      0.79       124
            Country       0.00      0.00      0.00        35
         Electronic       0.64      0.78      0.71      1263
       Experimental       0.46      0.32      0.38       450
               Folk       0.61      0.61      0.61       304
            Hip-Hop       0.67      0.63      0.65       440
       Instrumental       0.50      0.40      0.45       270
      International       0.50      0.35      0.41       204
               Jazz       0.69      0.31      0.43        77
Old-Time / Historic       0.95      0.96      0.96       102
                Pop       0.33      0.05      0.09       237
               Rock       0.72      0.88      0.79      1421
           Soul-RnB       0.67      0.06      0.12        31

           accuracy                           0.66      4958
          macro avg       0.58      0.48      0.49      4958
       weighted avg   

In [8]:
classifiers = {'SVC': SVC(kernel='linear', class_weight='balanced', random_state=RANDOM_STATE),
               'SVC-RBF': SVC(kernel='rbf', class_weight='balanced',random_state=RANDOM_STATE),
               'LR' : LogisticRegression(class_weight='balanced', max_iter=5000, random_state=RANDOM_STATE),
              }

for (name, cl) in classifiers.items():
    cl.fit(X_train_pca, y_train)
    y_pred = cl.predict(X_test_pca)
    print(classification_report(y_test, y_pred))

                     precision    recall  f1-score   support

          Classical       0.65      0.84      0.73       124
            Country       0.11      0.57      0.19        35
         Electronic       0.75      0.53      0.62      1263
       Experimental       0.39      0.40      0.40       450
               Folk       0.57      0.63      0.60       304
            Hip-Hop       0.56      0.71      0.63       440
       Instrumental       0.39      0.56      0.46       270
      International       0.40      0.49      0.44       204
               Jazz       0.28      0.61      0.39        77
Old-Time / Historic       0.94      0.93      0.94       102
                Pop       0.20      0.27      0.23       237
               Rock       0.89      0.64      0.75      1421
           Soul-RnB       0.13      0.52      0.21        31

           accuracy                           0.58      4958
          macro avg       0.48      0.59      0.51      4958
       weighted avg   

In [9]:
classifiers = {'SVC': SVC(kernel='linear', class_weight='balanced', random_state=RANDOM_STATE),
               'SVC-RBF': SVC(kernel='rbf', class_weight='balanced', random_state=RANDOM_STATE),
               'LR' : LogisticRegression(class_weight='balanced', max_iter=5000, random_state=RANDOM_STATE),
              }

for (name, cl) in classifiers.items():
    cl.fit(X_train_pca, y_train)
    y_pred = cl.predict(X_test_pca)
    print(classification_report(y_test, y_pred))

                     precision    recall  f1-score   support

          Classical       0.65      0.84      0.73       124
            Country       0.11      0.57      0.19        35
         Electronic       0.75      0.53      0.62      1263
       Experimental       0.39      0.40      0.40       450
               Folk       0.57      0.63      0.60       304
            Hip-Hop       0.56      0.71      0.63       440
       Instrumental       0.39      0.56      0.46       270
      International       0.40      0.49      0.44       204
               Jazz       0.28      0.61      0.39        77
Old-Time / Historic       0.94      0.93      0.94       102
                Pop       0.20      0.27      0.23       237
               Rock       0.89      0.64      0.75      1421
           Soul-RnB       0.13      0.52      0.21        31

           accuracy                           0.58      4958
          macro avg       0.48      0.59      0.51      4958
       weighted avg   

In [10]:
param_dist = {'C': sp.stats.loguniform(1e-1, 1e1), 
              'gamma': sp.stats.loguniform(1e-4, 1e0)
}

rsh = HalvingRandomSearchCV(SVC(kernel='rbf',
                            class_weight='balanced'), 
                            param_dist, 
                            scoring='f1_macro', 
                            random_state=RANDOM_STATE,
                            n_jobs=4 )
rsh.fit(X_train_pca, y_train)
cl = rsh.best_estimator_
print(cl)

y_pred = cl.predict(X_test_pca)
print(classification_report(y_test, y_pred))

SVC(C=2.8372189729803776, class_weight='balanced', gamma=0.2718658988575616)
                     precision    recall  f1-score   support

          Classical       0.79      0.85      0.82       124
            Country       0.56      0.54      0.55        35
         Electronic       0.75      0.71      0.73      1263
       Experimental       0.48      0.57      0.52       450
               Folk       0.62      0.71      0.66       304
            Hip-Hop       0.60      0.72      0.65       440
       Instrumental       0.49      0.54      0.51       270
      International       0.58      0.58      0.58       204
               Jazz       0.66      0.45      0.54        77
Old-Time / Historic       1.00      0.98      0.99       102
                Pop       0.26      0.27      0.26       237
               Rock       0.86      0.77      0.81      1421
           Soul-RnB       0.69      0.29      0.41        31

           accuracy                           0.68      4958
      

In [11]:
param_dist = {'max_iter': [5000],
              'C': sp.stats.loguniform(1e-1, 1e1) 
}

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

rsh = HalvingRandomSearchCV(LogisticRegression(class_weight='balanced'),
                            param_dist, 
                            scoring='f1_macro', 
                            random_state=RANDOM_STATE,
                            n_jobs=4)

rsh.fit(X_train_pca, y_train)
cl = rsh.best_estimator_
print(cl)

y_pred = cl.predict(X_test_pca)
print(classification_report(y_test, y_pred))


LogisticRegression(C=2.5693969947404893, class_weight='balanced', max_iter=5000)
                     precision    recall  f1-score   support

          Classical       0.68      0.84      0.75       124
            Country       0.10      0.66      0.18        35
         Electronic       0.78      0.45      0.57      1263
       Experimental       0.40      0.34      0.37       450
               Folk       0.58      0.60      0.59       304
            Hip-Hop       0.54      0.70      0.61       440
       Instrumental       0.36      0.48      0.41       270
      International       0.34      0.50      0.41       204
               Jazz       0.21      0.57      0.31        77
Old-Time / Historic       0.88      0.97      0.93       102
                Pop       0.19      0.24      0.21       237
               Rock       0.88      0.64      0.74      1421
           Soul-RnB       0.07      0.61      0.13        31

           accuracy                           0.54      4958
  

### PCA with 100 components

In [12]:
# now transform the train/test sets
pca = PCA(random_state=RANDOM_STATE, n_components = 100)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

X_train_pca.shape, X_test_pca.shape

((19829, 100), (4958, 100))

In [13]:
classifiers = {'SVC': SVC(kernel='linear', class_weight='balanced', random_state=RANDOM_STATE),
               'SVC-RBF': SVC(kernel='rbf', class_weight='balanced', random_state=RANDOM_STATE),
               'LR' : LogisticRegression(class_weight='balanced', max_iter=5000, random_state=RANDOM_STATE),
               'KNN' :KNeighborsClassifier()
              }

for (name, cl) in classifiers.items():
    cl.fit(X_train_pca, y_train)
    y_pred = cl.predict(X_test_pca)
    print(classification_report(y_test, y_pred))

                     precision    recall  f1-score   support

          Classical       0.63      0.86      0.73       124
            Country       0.12      0.69      0.20        35
         Electronic       0.74      0.51      0.60      1263
       Experimental       0.41      0.41      0.41       450
               Folk       0.58      0.62      0.60       304
            Hip-Hop       0.54      0.67      0.60       440
       Instrumental       0.37      0.55      0.44       270
      International       0.35      0.49      0.40       204
               Jazz       0.26      0.52      0.35        77
Old-Time / Historic       0.92      0.95      0.94       102
                Pop       0.21      0.27      0.24       237
               Rock       0.90      0.63      0.74      1421
           Soul-RnB       0.10      0.55      0.18        31

           accuracy                           0.56      4958
          macro avg       0.47      0.59      0.49      4958
       weighted avg   

In [14]:
param_dist = {'C': sp.stats.loguniform(1e-1, 1e1), 
              'gamma': sp.stats.loguniform(1e-4, 1e0)
}

rsh = HalvingRandomSearchCV(SVC(kernel='rbf',
                            class_weight='balanced'), 
                            param_dist, 
                            scoring='f1_macro', 
                            random_state=RANDOM_STATE,
                            n_jobs=4 )
rsh.fit(X_train_pca, y_train)
cl = rsh.best_estimator_
print(cl)

y_pred = cl.predict(X_test_pca)
print(classification_report(y_test, y_pred))

SVC(C=2.8372189729803776, class_weight='balanced', gamma=0.2718658988575616)
                     precision    recall  f1-score   support

          Classical       0.80      0.85      0.82       124
            Country       0.50      0.57      0.53        35
         Electronic       0.74      0.69      0.72      1263
       Experimental       0.49      0.57      0.53       450
               Folk       0.60      0.69      0.64       304
            Hip-Hop       0.58      0.70      0.63       440
       Instrumental       0.49      0.56      0.52       270
      International       0.55      0.58      0.57       204
               Jazz       0.62      0.47      0.53        77
Old-Time / Historic       0.99      0.99      0.99       102
                Pop       0.25      0.27      0.26       237
               Rock       0.87      0.76      0.81      1421
           Soul-RnB       0.71      0.32      0.44        31

           accuracy                           0.67      4958
      

In [15]:
param_dist = {'max_iter': [5000],
              'C': sp.stats.loguniform(1e-1, 1e1) 
}

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

rsh = HalvingRandomSearchCV(LogisticRegression(class_weight='balanced'),
                            param_dist, 
                            scoring='f1_macro', 
                            random_state=RANDOM_STATE,
                            n_jobs=4)

rsh.fit(X_train_pca, y_train)
cl = rsh.best_estimator_
print(cl)

y_pred = cl.predict(X_test_pca)
print(classification_report(y_test, y_pred))

LogisticRegression(C=2.5693969947404893, class_weight='balanced', max_iter=5000)
                     precision    recall  f1-score   support

          Classical       0.64      0.81      0.71       124
            Country       0.09      0.69      0.17        35
         Electronic       0.77      0.43      0.55      1263
       Experimental       0.40      0.33      0.36       450
               Folk       0.57      0.60      0.58       304
            Hip-Hop       0.52      0.67      0.58       440
       Instrumental       0.33      0.47      0.39       270
      International       0.32      0.44      0.37       204
               Jazz       0.22      0.58      0.32        77
Old-Time / Historic       0.88      0.97      0.92       102
                Pop       0.18      0.23      0.20       237
               Rock       0.88      0.64      0.74      1421
           Soul-RnB       0.07      0.58      0.12        31

           accuracy                           0.53      4958
  