In [0]:
import io
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from google.colab import files
uploaded = files.upload()

Saving song_data.csv to song_data.csv


In [0]:
#Read in dataset
songs = pd.read_csv(io.BytesIO(uploaded['song_data.csv']))

In [0]:
# Split data into train/test
X = songs.drop('Genre', axis = 1)
y = songs['Genre']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)

# Scale and transform data
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

## Hyperparameter tuning with GridSearchCV


In [0]:
grid_param = {'C': [0.01, 0.1, 1, 10, 100], 'gamma': [0.1, 0.01, 0.001], 'kernel': ['rbf']}
grid = GridSearchCV(SVC(), grid_param, refit = True, verbose = 2, n_jobs = -1, cv = 5)
grid.fit(X_train, y_train)

y_pred = grid.predict(X_test)

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

Fitting 5 folds for each of 15 candidates, totalling 75 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   38.8s
[Parallel(n_jobs=-1)]: Done  75 out of  75 | elapsed:  1.3min finished


[[55 21 12 24  4  2 16  5]
 [20 58 18 10  3 15  7 12]
 [ 2 13 81  5  3 15 12  4]
 [34 14  1 74  0 10 12 11]
 [ 2 29 10  2 15  6 11  1]
 [13 17 11 15  0 71 14 15]
 [11 27 18 12  2 22 21 15]
 [ 7  4  5  9  1  7 13 89]]
               precision    recall  f1-score   support

   Electronic       0.38      0.40      0.39       139
 Experimental       0.32      0.41      0.36       143
         Folk       0.52      0.60      0.56       135
      Hip-Hop       0.49      0.47      0.48       156
 Instrumental       0.54      0.20      0.29        76
International       0.48      0.46      0.47       156
          Pop       0.20      0.16      0.18       128
         Rock       0.59      0.66      0.62       135

     accuracy                           0.43      1068
    macro avg       0.44      0.42      0.42      1068
 weighted avg       0.44      0.43      0.43      1068



## Implementing the default SVC using a linear kernel



In [0]:
vanilla_clf = svm.SVC(kernel = 'linear', verbose = True)
vanilla_clf.fit(X_train, y_train)

y_pred = vanilla_clf.predict(X_test)

print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[71  5  3 27  4 12  4  5]
 [21 41 26 12  4 23  6 25]
 [ 4 16 94  0  1 11  7 16]
 [35  2  3 68  0  5  6 11]
 [ 5 14 13  1  9  4  6  5]
 [23 32 12 14  0 78 10 13]
 [26 15 14 22  2 28 13 17]
 [ 7  7  6 12  3  7 11 71]]
               precision    recall  f1-score   support

   Electronic       0.37      0.54      0.44       131
 Experimental       0.31      0.26      0.28       158
         Folk       0.55      0.63      0.59       149
      Hip-Hop       0.44      0.52      0.48       130
 Instrumental       0.39      0.16      0.23        57
International       0.46      0.43      0.45       182
          Pop       0.21      0.09      0.13       137
         Rock       0.44      0.57      0.49       124

     accuracy                           0.42      1068
    macro avg       0.40      0.40      0.39      1068
 weighted avg       0.40      0.42      0.40      1068



In [0]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

## Building a random forest 

In [0]:
clf = RandomForestClassifier(min_samples_leaf = 3, max_depth = 10)
clf.fit(X_train, y_train)
print(clf.feature_importances_)

[0.0353968  0.04573808 0.05375771 0.05470309 0.07544742 0.05379582
 0.06046184 0.04444786 0.03630491 0.02826416 0.03341883 0.0430472
 0.03465819 0.02799679 0.03974074 0.03675005 0.04029693 0.0442393
 0.04250856 0.04210929 0.03647744 0.03183022 0.02888406 0.02972469]


In [0]:
y_pred = clf.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[45  6 12 36  6 10  9 10]
 [13 42 27 17 10 22  4 19]
 [ 1  6 92  6  2 24  4  7]
 [30  2  4 85  1  8  4 12]
 [ 2 11 24  3  7  8  4  5]
 [ 9 12 18 20  0 84  9 14]
 [12  9 14 25  5 30 16 23]
 [ 4  3 16 12  3 14  4 72]]
               precision    recall  f1-score   support

   Electronic       0.39      0.34      0.36       134
 Experimental       0.46      0.27      0.34       154
         Folk       0.44      0.65      0.53       142
      Hip-Hop       0.42      0.58      0.49       146
 Instrumental       0.21      0.11      0.14        64
International       0.42      0.51      0.46       166
          Pop       0.30      0.12      0.17       134
         Rock       0.44      0.56      0.50       128

     accuracy                           0.41      1068
    macro avg       0.38      0.39      0.37      1068
 weighted avg       0.40      0.41      0.39      1068



## Building a deep learning network