<a href="https://colab.research.google.com/github/aozturk/data-focus/blob/master/keras_music_genre_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Download to use GTZAN genre collection dataset for classification. 

!wget "http://opihi.cs.uvic.ca/sound/genres.tar.gz" 

--2019-05-15 22:09:32--  http://opihi.cs.uvic.ca/sound/genres.tar.gz
Resolving opihi.cs.uvic.ca (opihi.cs.uvic.ca)... 142.104.68.135
Connecting to opihi.cs.uvic.ca (opihi.cs.uvic.ca)|142.104.68.135|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1225573614 (1.1G) [application/x-gzip]
Saving to: ‘genres.tar.gz’


2019-05-15 22:11:15 (11.4 MB/s) - ‘genres.tar.gz’ saved [1225573614/1225573614]



In [0]:
!tar -xzf genres.tar.gz

In [3]:
# feature extractoring and preprocessing data
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

#Keras
import keras

import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [4]:
cmap = plt.get_cmap('inferno')

plt.figure(figsize=(10,10))
# The dataset consists of 10 genres
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    pathlib.Path(f'img_data/{g}').mkdir(parents=True, exist_ok=True)     
    for filename in os.listdir(f'./genres/{g}'):
        songname = f'./genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=5)
        # Extracting the Spectrogram for every Audio
        plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
        plt.axis('off');
        plt.savefig(f'img_data/{g}/{filename[:-3].replace(".", "")}.png')
        plt.clf()

<Figure size 720x720 with 0 Axes>

In [0]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [0]:
file = open('data.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
# Extracting features from Spectrogram
for g in genres:
    for filename in os.listdir(f'./genres/{g}'):
        songname = f'./genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        rmse = librosa.feature.rmse(y=y)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr) # Chroma Frequencies
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr) # Spectral Centroid
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr) # Spectral Bandwidth
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr) # Spectral Roll-off
        zcr = librosa.feature.zero_crossing_rate(y) # Zero Crossing Rate
        mfcc = librosa.feature.mfcc(y=y, sr=sr) # Mel-frequency cepstral coefficients (MFCC)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data.csv', 'a', newline='')
        # Write the data to a csv file
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [7]:
# Analysing the Data in Pandas
data = pd.read_csv('data.csv')
data.head()

Unnamed: 0,filename,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,mfcc10,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,blues.00035.au,0.28833,0.173407,1506.979392,2096.637631,3404.796078,0.04543,-171.786064,120.280233,15.336581,21.097281,-5.410021,24.192475,-24.140309,18.545325,-22.352987,1.30264,-20.761293,-4.984545,-17.119595,-7.898076,-15.321158,-9.350151,-8.90613,-8.516965,-14.330011,-19.935203,blues
1,blues.00021.au,0.321085,0.101159,1046.74884,1480.933764,1830.788929,0.047937,-264.698399,138.855977,11.186486,48.520342,14.915178,20.572826,3.207815,1.247171,-15.846408,15.986909,3.368814,-1.520129,1.894727,6.325552,-0.986607,-2.41286,-3.683063,0.496945,5.046816,-0.780009,blues
2,blues.00081.au,0.38026,0.248262,2116.942959,1956.611056,4196.10796,0.127272,-26.929785,107.334008,-46.809993,40.932634,-21.463776,24.816275,-18.945571,15.253459,-15.050104,12.262823,-15.23405,14.336612,-13.821769,7.562789,-6.181372,0.330165,-6.829571,0.965922,-7.570825,2.918987,blues
3,blues.00061.au,0.451905,0.141766,2504.026852,2180.438691,5077.152632,0.167584,-82.454706,94.147763,-27.120917,75.6551,-27.509951,35.576213,-20.278538,25.109201,-15.36231,11.329742,-13.398637,8.139763,-8.494857,8.283361,-10.346549,-3.462061,-5.223508,-2.595848,-8.363733,-6.978243,blues
4,blues.00099.au,0.370279,0.105061,1538.817338,1703.507382,3175.272466,0.074799,-177.165864,136.221594,-26.915181,36.37629,-18.760828,20.909633,-1.778092,3.013012,-5.600873,5.638468,-8.117091,5.642528,-10.790228,-2.416264,-6.943611,-2.507238,-5.203642,-3.407661,-3.246502,-2.681821,blues


In [8]:
data.shape

(1000, 28)

In [0]:
# Dropping unneccesary columns
data = data.drop(['filename'],axis=1)

In [0]:
# Encoding the Labels
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)

In [0]:
# Scaling the Feature columns
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))

In [0]:
# Dividing data into training and Testing set¶
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [13]:
len(y_train)
len(y_test)
X_train[10]

array([-1.37033782, -1.30169706, -1.16063004, -1.52405843, -1.27174291,
       -0.57566194, -1.33485044,  1.54346624, -1.0448233 , -0.95610604,
       -0.69073897, -1.76404548, -0.85322922, -1.03110979, -0.03355505,
       -0.84523657, -0.84122547, -0.80974017,  0.41753526, -0.47049311,
        1.17927252,  0.87434598,  1.45060155,  0.08136082,  0.18161071,
        0.25832182])

In [14]:
# Building the deep learning network

from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)))

model.add(layers.Dense(128, activation='relu'))

model.add(layers.Dense(64, activation='relu'))

model.add(layers.Dense(10, activation='softmax'))

Instructions for updating:
Colocations handled automatically by placer.


In [0]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [16]:
history = model.fit(X_train,
                    y_train,
                    epochs=20,
                    batch_size=128)

Instructions for updating:
Use tf.cast instead.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [17]:
test_loss, test_acc = model.evaluate(X_test,y_test)
print('test_acc: ',test_acc)

test_acc:  0.65


In [0]:
# Validate the approach
x_val = X_train[:200]
partial_x_train = X_train[200:]

y_val = y_train[:200]
partial_y_train = y_train[200:]

In [19]:
model = models.Sequential()
model.add(layers.Dense(512, activation='relu', input_shape=(X_train.shape[1],)))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(partial_x_train,
          partial_y_train,
          epochs=30,
          batch_size=512,
          validation_data=(x_val, y_val))
results = model.evaluate(X_test, y_test)
print(results)

Train on 600 samples, validate on 200 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
[1.0745003938674926, 0.615]


In [0]:
# Predict on the test data
predictions = model.predict(X_test)

In [21]:
predictions[0].shape

(10,)

In [22]:
np.sum(predictions[0])

0.99999994

In [23]:
np.argmax(predictions[0])

1