# Music Genre Classification
An music classifier using multiple machine learning methods

**Neural Networks** • **SVMs** • **Logistic Regression**


---
Authors 
- David Chang
- Denizhan Ene
---
Credits
- Dataset: [GTZAN Dataset](https://www.kaggle.com/datasets/andradaolteanu/gtzan-dataset-music-genre-classification)
- sklearn: [Docs](https://scikit-learn.org/0.21/documentation.html)
---

For local installations:
```Shell
pip install kaggle
kaggle datasets download -d andradaolteanu/gtzan-dataset-music-genre-classification
```

# Setup

In this section, we set up the dataset by establishing a directory where it is and where the information will be contained in. The CSV is stored in a shared Google Drive directory that could be accessed by mounting the drive.

We utilize many libraries in the project so this is the place we put all the imports as well.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# |‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾|
# |                           |
# |          Imports          |
# |                           |
# |___________________________|

import numpy as np
import matplotlib
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

In [7]:
# |‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾|
# |                           |
# |         null check        |
# |                           |
# |___________________________|

# check if any values need filling

#read feature data from csv file
#hosted in google drive
df = pd.read_csv("/content/drive/MyDrive/ML_genre_classification_nn_project/Data/features_30_sec.csv")
df2 = pd.read_csv("/content/drive/MyDrive/ML_genre_classification_nn_project/Data/features_3_sec.csv")
print(df.shape)
print(df2.shape)
# print(df.head())
# print(df2.head())

#df.isnull().any()

# None

(1000, 60)
(9990, 60)


In [8]:
df.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.wav,661794,0.350088,0.088757,0.130228,0.002827,1784.16585,129774.064525,2002.44906,85882.761315,...,52.42091,-1.690215,36.524071,-0.408979,41.597103,-2.303523,55.062923,1.221291,46.936035,blues
1,blues.00001.wav,661794,0.340914,0.09498,0.095948,0.002373,1530.176679,375850.073649,2039.036516,213843.755497,...,55.356403,-0.731125,60.314529,0.295073,48.120598,-0.283518,51.10619,0.531217,45.786282,blues
2,blues.00002.wav,661794,0.363637,0.085275,0.17557,0.002746,1552.811865,156467.643368,1747.702312,76254.192257,...,40.598766,-7.729093,47.639427,-1.816407,52.382141,-3.43972,46.63966,-2.231258,30.573025,blues
3,blues.00003.wav,661794,0.404785,0.093999,0.141093,0.006346,1070.106615,184355.942417,1596.412872,166441.494769,...,44.427753,-3.319597,50.206673,0.636965,37.31913,-0.619121,37.259739,-3.407448,31.949339,blues
4,blues.00004.wav,661794,0.308526,0.087841,0.091529,0.002303,1835.004266,343399.939274,1748.172116,88445.209036,...,86.099236,-5.454034,75.269707,-0.916874,53.613918,-4.404827,62.910812,-11.703234,55.19516,blues


In [9]:
df2.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.0.wav,66149,0.335406,0.091048,0.130405,0.003521,1773.065032,167541.630869,1972.744388,117335.771563,...,39.687145,-3.24128,36.488243,0.722209,38.099152,-5.050335,33.618073,-0.243027,43.771767,blues
1,blues.00000.1.wav,66149,0.343065,0.086147,0.112699,0.00145,1816.693777,90525.690866,2010.051501,65671.875673,...,64.748276,-6.055294,40.677654,0.159015,51.264091,-2.837699,97.03083,5.784063,59.943081,blues
2,blues.00000.2.wav,66149,0.346815,0.092243,0.132003,0.00462,1788.539719,111407.437613,2084.565132,75124.921716,...,67.336563,-1.76861,28.348579,2.378768,45.717648,-1.938424,53.050835,2.517375,33.105122,blues
3,blues.00000.3.wav,66149,0.363639,0.086856,0.132565,0.002448,1655.289045,111952.284517,1960.039988,82913.639269,...,47.739452,-3.841155,28.337118,1.218588,34.770935,-3.580352,50.836224,3.630866,32.023678,blues
4,blues.00000.4.wav,66149,0.335579,0.088129,0.143289,0.001701,1630.656199,79667.267654,1948.503884,60204.020268,...,30.336359,0.664582,45.880913,1.689446,51.363583,-3.392489,26.738789,0.536961,29.146694,blues


Included in the dataset are two csv files, one with 1000 samples that contain 30 seconds of data. The other one has ~ 9990 samples but only with 3 seconds of data.

We're planning running the model on both.

In [11]:
# |‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾|
# |                           |
# |       sanity check        |
# |                           |
# |___________________________|

data = df.to_numpy()
#initalize and set up the column vector and the feature matrix
song_names = []
labels = []
features = []
# for i in range(len(data)):
#   song_names.append(data[i][0])
#   labels.append(data[i][-1])

labels = df.pop('filename')


for list in data:
  list = list[1:-1]
  features.append(list)

arr = np.array(features)
print(song_names)
print(labels) # 100 blues, 100 classical, 100 country etc etc
# print(arr)
# print(features)

KeyError: ignored

In [5]:
label_index = dict()
for i, j in enumerate(df.label.unique()):
  label_index[j] = i

print(label_index)

{'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}


In [6]:
# df_shuffle = shuffle(df)
# df_shuffle.head()

X = df
y = X.pop('label').to_numpy()

# encode the labels
y = np.array([label_index[x] for x in y]).reshape((y.shape[0],1))

# print(X.shape)
# print(y.shape)


# |‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾|
# |                           |
# |           TODO!           |
# |                           |
# |___________________________|
# implement normalization?

# split the data 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

print(f'Training Examples shape:\t{X_train.shape}')
print(f'Testing Examples shape:\t\t{X_test.shape}')
print(f'Training Labels shape:\t\t{y_train.shape}')
print(f'Testing Labels shape:\t\t{y_test.shape}')

Training Examples shape:	(800, 58)
Testing Examples shape:		(200, 58)
Training Labels shape:		(800, 1)
Testing Labels shape:		(200, 1)


In [73]:
hidden_layers = (20,20,20,20,20,20)
mlp = MLPClassifier(hidden_layer_sizes=hidden_layers, activation='relu', solver='adam', max_iter=5000, )
mlp.fit(X_train, y_train)

predict_train = mlp.predict(X_train)
predict_test = mlp.predict(X_test)

  y = column_or_1d(y, warn=True)


In [74]:
print(confusion_matrix(y_train, predict_train))
print(classification_report(y_train, predict_train))

[[ 1  0  0  0  1  0  5 73  0  0]
 [ 0 41  1  0  0  0  1 43  1  0]
 [ 0  0  0  0  0  0  0 73  0  0]
 [ 0  0  0  0  0  0  0 79  0  0]
 [ 0  0  0  0  1  0  0 81  0  3]
 [ 0  1  1  0  0  7  0 69  0  0]
 [ 0  0  0  0  2  1 17 54  0  1]
 [ 0  0  0  0  0  0  0 87  0  0]
 [ 0  0  0  0  0  0  2 75  0  0]
 [ 0  0  0  0  0  0  0 77  0  2]]
              precision    recall  f1-score   support

           0       1.00      0.01      0.02        80
           1       0.98      0.47      0.64        87
           2       0.00      0.00      0.00        73
           3       0.00      0.00      0.00        79
           4       0.25      0.01      0.02        85
           5       0.88      0.09      0.16        78
           6       0.68      0.23      0.34        75
           7       0.12      1.00      0.22        87
           8       0.00      0.00      0.00        77
           9       0.33      0.03      0.05        79

    accuracy                           0.20       800
   macro avg       

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
