# Music Genre Classification

Personal project to improve my tensorflow/keras knowledge, while learning about audio extraction...

## Table of Contents
1. [Dataset Download](#2.-Dataset-Download)

In [18]:
import os
import numpy as np
from tensorflow import keras
import librosa
import matplotlib.pyplot as plt

## 2. Audio feature extraction
- Mel Frequency Cepstral Coefficients (MFCC)
- Mel Spectrogram
- Chroma Vector
- Tonal Centroid Features (Tonnetz)

### Mel Frequency Cepstral Coefficients (MFCC)

MFCCs (Mel-Frequency Cepstral Coefficients) of an audio signal are small set of features which describe the overall shape of the spectral envelope. Frequently used for voice regonition.

In [19]:
def get_mfcc(path):
    y, sr = librosa.load(path, offset=0, duration=30)
    mfcc = np.array(librosa.feature.mfcc(y=y, sr=sr))
    return mfcc

### Mel Spectrogram

A mel spectrogram is a spectrogram where the frequencies are converted to the mel scale. Applies a frequency-domain filter bank to audio signal that are windowed in time.

In [20]:
def get_mel_spectrogram(path):
    y, sr = librosa.load(path, offset=0, duration=30)
    mel_spectogram = np.array(librosa.feature.melspectrogram(y=y, sr=sr))
    return mel_spectogram

### Chroma vector

Chroma-based features, also referred to as "pitch class profiles", represents the tonal content of a musical audio signal in a condensed form. Useful for chord recognition or harmonic similarity estimation.

In [21]:
def get_chroma_vector(path):
    y, sr = librosa.load(path)
    chroma_vector = np.array(librosa.feature.chroma_stft(y=y, sr=sr))
    return chroma_vector

### Tonal Centroid Features (Tonnetz)

Tonnetz (German for "tone network") is a pictorial representation of projected Chroma features onto a 6-dimensional basis representing the perfect fifth, minor third, and major third, revealing affinities and structures between notes and on concrete music pieces.

In [22]:
def get_tonnetz(path):
    y, sr = librosa.load(path)
    tonnetz = np.array(librosa.feature.tonnetz(y=y, sr=sr))
    return tonnetz

## 2.1 Features calculation

In [23]:
def get_features(path):

    # MFCC feature
    mfcc = get_mfcc(path)
    mfcc_feature = np.concatenate((mfcc.mean(axis=1), 
                                   mfcc.min(axis=1), 
                                   mfcc.max(axis=1)))

    # Mel Spectrogram feature
    mel_spectrogram = get_mel_spectrogram(path)
    mel_spectrogram_feature = np.concatenate((mel_spectrogram.mean(axis=1), 
                                              mel_spectrogram.min(axis=1), 
                                              mel_spectrogram.max(axis=1)))

    # Chroma Vector feature
    chroma_vector = get_chroma_vector(path)
    chroma_vector_feature = np.concatenate((chroma_vector.mean(axis=1), 
                                            chroma_vector.min(axis=1), 
                                            chroma_vector.max(axis=1)))

    # Tonnetz feature
    tonnetz = get_tonnetz(path)
    tonnetz_feature = np.concatenate((tonnetz.mean(axis=1), 
                                      tonnetz.min(axis=1), 
                                      tonnetz.max(axis=1)))

    feature = np.concatenate((chroma_vector_feature, mel_spectrogram_feature, mfcc_feature, tonnetz_feature))
    return feature

## Prepare Dataset

### Calculate features

In [27]:
directory = 'datasets/gtzan_dataset/genres_original'
genres = ['blues', 'classical', 'country', 'disco', 'hiphop',
          'jazz', 'metal', 'pop', 'reggae', 'rock']

features = []
labels = []

for genre in genres:
    print(f"Calculating features for: {genre}")
    for file in os.listdir(directory + "/" + genre):
        file_path = directory + "/" + genre + "/" + file

        features.append(get_features(file_path))
        labels.append(genres.index(genre))

Calculating features for: blues


FileNotFoundError: [WinError 3] El sistema no puede encontrar la ruta especificada: 'datasets/gtzan_dataset/genres_original/blues'

### Split into training, validation and testing

In [10]:
permutations = np.random.permutation(999)
features = np.array(features)[permutations]
labels = np.array(labels)[permutations]

features_train = features[0:600]
labels_train = labels[0:600]

features_val = features[600:799]
labels_val = labels[600:799]

features_test = features[799:999]
labels_test = labels[799:999]

## 3.Training the model

### Tensorflow/Keras basic architecture

In [11]:
def create_model():
    model = keras.models.Sequential([
        keras.layers.Dense(300, activation = 'relu', input_shape=(498,)),
        keras.layers.Dense(200, activation = 'relu'),
        keras.layers.Dense(10, activation = 'softmax')
    ])

    model.compile(optimizer = keras.optimizers.Adam(),
                  loss = 'sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

In [12]:
model = create_model()

history = model.fit(x=features_train.tolist(),
                    y=labels_train.tolist(),
                    verbose=1,
                    validation_data=(features_val.tolist(),
                                     labels_val.tolist()),
                    epochs=64)


Epoch 1/64


Epoch 2/64
Epoch 3/64
Epoch 4/64
Epoch 5/64
Epoch 6/64
Epoch 7/64
Epoch 8/64
Epoch 9/64
Epoch 10/64
Epoch 11/64
Epoch 12/64
Epoch 13/64
Epoch 14/64
Epoch 15/64
Epoch 16/64
Epoch 17/64
Epoch 18/64
Epoch 19/64
Epoch 20/64
Epoch 21/64
Epoch 22/64
Epoch 23/64
Epoch 24/64
Epoch 25/64
Epoch 26/64
Epoch 27/64
Epoch 28/64
Epoch 29/64
Epoch 30/64
Epoch 31/64
Epoch 32/64
Epoch 33/64
Epoch 34/64
Epoch 35/64
Epoch 36/64
Epoch 37/64
Epoch 38/64
Epoch 39/64
Epoch 40/64
Epoch 41/64
Epoch 42/64
Epoch 43/64
Epoch 44/64
Epoch 45/64
Epoch 46/64
Epoch 47/64
Epoch 48/64
Epoch 49/64
Epoch 50/64
Epoch 51/64
Epoch 52/64
Epoch 53/64
Epoch 54/64
Epoch 55/64
Epoch 56/64
Epoch 57/64
Epoch 58/64
Epoch 59/64
Epoch 60/64
Epoch 61/64
Epoch 62/64
Epoch 63/64
Epoch 64/64


In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 300)               149700    
                                                                 
 dense_1 (Dense)             (None, 200)               60200     
                                                                 
 dense_2 (Dense)             (None, 10)                2010      
                                                                 
Total params: 211910 (827.77 KB)
Trainable params: 211910 (827.77 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# Plot the chart for accuracy and loss on both training and validation
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()

plt.plot(epochs, loss, 'r', label='Training Loss')
plt.plot(epochs, val_loss, 'b', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [14]:
score = model.evaluate(x=features_test.tolist(),y=labels_test.tolist(), verbose=0)
print('Accuracy : ' + str(score[1]*100) + '%')

Accuracy : 51.499998569488525%
