In [4]:
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

#Keras
import keras

import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [8]:
cmap = plt.get_cmap('inferno')

plt.figure(figsize=(10,10))
genres = ['alternative', 'blues', 'electronic', 'folkcountry', 'funksoulrnb',
       'jazz', 'pop', 'raphiphop', 'rock']
for g in genres:
    pathlib.Path(f'img_data/{g}').mkdir(parents=True, exist_ok=True)     
    for filename in os.listdir(f'./dataset/{g}'):
        songname = f'./dataset/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=5)
        plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
        plt.axis('off');
        plt.savefig(f'img_data/{g}/{filename[:-3].replace(".", "")}.png')
        plt.clf()

<Figure size 720x720 with 0 Axes>

In [9]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [12]:
file = open('data.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = ['alternative', 'blues', 'electronic', 'folkcountry', 'funksoulrnb',
       'jazz', 'pop', 'raphiphop', 'rock']
for g in genres:
    for filename in os.listdir(f'./dataset/{g}'):
        songname = f'./dataset/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        rmse = librosa.feature.rms(y=y)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [97]:
from sklearn.utils import shuffle
data=pd.read_csv('data.csv')
data=shuffle(data)

data.head()

Unnamed: 0,filename,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
196,classical.00096.wav,0.293142,0.057,1932.858343,1740.355038,3573.061728,0.11752,-203.587173,111.198334,-51.526943,...,3.836547,-3.773128,5.291516,4.111487,6.639014,2.993226,3.53975,-3.361387,-3.682384,classical
185,classical.00085.wav,0.309653,0.038889,1525.619293,1588.175467,2732.975141,0.091588,-246.189621,149.447739,-41.977005,...,-5.630424,-2.579881,-4.521075,-6.165346,-4.970548,-3.346554,-3.345275,-0.672826,0.2215,classical
470,hiphop.00070.wav,0.43106,0.278913,2501.271271,3049.735764,6028.54688,0.069265,-81.691071,82.321304,53.563557,...,1.534536,-0.723483,1.349879,-0.385386,-3.133822,-0.490844,-0.732915,-3.769838,-2.506016,hiphop
363,disco.00063.wav,0.54785,0.29392,2583.277699,2626.310838,5855.472926,0.099773,-51.752674,70.331902,-3.919614,...,17.185402,-2.770966,11.966265,-1.632752,6.927689,-2.915434,4.917283,-0.487009,6.742309,disco
546,jazz.00046.wav,0.25024,0.110548,1295.167919,1446.061103,2695.342035,0.05865,-204.188126,147.604614,-25.223412,...,1.565303,1.477296,-2.724842,-0.284619,-2.217539,1.517982,-1.05465,-1.772529,-2.46812,jazz


In [98]:
label= data.label
x=data.iloc[:,1:-1]

In [99]:
from sklearn.preprocessing import LabelEncoder
encoder=LabelEncoder()
y=encoder.fit_transform(label)
print(np.unique(y))

[0 1 2 3 4 5 6 7 8 9]


In [100]:
# from sklearn.cluster import KMeans
# import numpy as np

# kmeans = KMeans(n_clusters=10,init='k-means++', random_state=0).fit(x)
# kmeans.labels_


In [101]:
# test=pd.DataFrame()
# test['y']=y
# test['ny']=kmeans.labels_
# test.head()
# groupedtest=pd.DataFrame(test.groupby(by=['y','ny']).size().reset_index(name='count'))

In [102]:

# from sklearn.metrics.cluster import adjusted_rand_score
# print(adjusted_rand_score(test.y,test.ny))

In [103]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y,random_state=42,test_size=0.2)

In [104]:
from sklearn.ensemble import RandomForestClassifier

clf=RandomForestClassifier()
clf.fit(xtrain,ytrain)
pred=clf.predict(xtest)

In [105]:
from sklearn import metrics as m

print(m.accuracy_score(ytest,pred))

0.555


In [106]:
from tensorflow.keras import models
from tensorflow.keras import layers

model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_shape=(xtrain.shape[1],)))

model.add(layers.Dense(128, activation='relu'))

model.add(layers.Dense(64, activation='relu'))

model.add(layers.Dense(10, activation='softmax'))



model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


history = model.fit(xtrain,
                    ytrain,
                    epochs=20,
                    batch_size=128)


test_loss, test_acc = model.evaluate(xtest,ytest)

print('test_acc: ',test_acc)

W1101 00:25:56.912035 21224 training.py:504] Falling back from v2 loop because of error: Failed to find data adapter that can handle input: <class 'pandas.core.frame.DataFrame'>, <class 'NoneType'>


Train on 800 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


W1101 00:25:57.352209 21224 training.py:504] Falling back from v2 loop because of error: Failed to find data adapter that can handle input: <class 'pandas.core.frame.DataFrame'>, <class 'NoneType'>


test_acc:  0.09


In [107]:
x_val = xtrain[:200]
partial_x_train = xtrain[200:]

y_val = ytrain[:200]
partial_y_train = ytrain[200:]

In [108]:
model = models.Sequential()
model.add(layers.Dense(512, activation='relu', input_shape=(xtrain.shape[1],)))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(partial_x_train,
          partial_y_train,
          epochs=30,
          batch_size=512,
          validation_data=(x_val, y_val))
results = model.evaluate(xtest, ytest)

W1101 00:25:58.410072 21224 training.py:504] Falling back from v2 loop because of error: Failed to find data adapter that can handle input: <class 'pandas.core.frame.DataFrame'>, <class 'NoneType'>


Train on 600 samples, validate on 200 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


W1101 00:25:59.053997 21224 training.py:504] Falling back from v2 loop because of error: Failed to find data adapter that can handle input: <class 'pandas.core.frame.DataFrame'>, <class 'NoneType'>




In [110]:
test_loss, test_acc = model.evaluate(xtest,ytest)

W1101 00:26:46.179276 21224 training.py:504] Falling back from v2 loop because of error: Failed to find data adapter that can handle input: <class 'pandas.core.frame.DataFrame'>, <class 'NoneType'>




In [111]:
print('test_acc: ',test_acc)

test_acc:  0.09
