In [8]:
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

#Keras
import keras

import warnings
warnings.filterwarnings('ignore')

In [11]:
cmap = plt.get_cmap('inferno')

plt.figure(figsize=(10,10))
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    pathlib.Path(f'img_data/{g}').mkdir(parents=True, exist_ok=True)     
    for filename in os.listdir(f'./genres/{g}'):
        songname = f'./genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=5)
        plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
        plt.axis('off');
        plt.savefig(f'img_data/{g}/{filename[:-3].replace(".", "")}.png')
        plt.clf()

<Figure size 720x720 with 0 Axes>

In [12]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [13]:
file = open('data.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    for filename in os.listdir(f'./genres/{g}'):
        songname = f'./genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        rmse = librosa.feature.rms(y=y)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [66]:
from sklearn.utils import shuffle
data=pd.read_csv('data.csv')
data=shuffle(data)

data.head()

Unnamed: 0,filename,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
343,disco.00043.wav,0.420332,0.173692,2874.926232,2759.729292,6019.138603,0.134784,-62.806194,77.884285,3.535205,...,0.984862,5.618052,0.688097,-0.131621,2.206273,-1.850217,-6.809112,-3.191936,-3.54532,disco
482,hiphop.00082.wav,0.503555,0.10234,2290.302465,2182.028104,4819.987521,0.113229,-122.766541,95.037415,-29.34173,...,15.642358,-6.696302,8.275928,-0.745034,9.416046,1.266228,1.673262,0.488482,1.613338,hiphop
568,jazz.00068.wav,0.285185,0.099549,1382.232294,1642.106473,2831.024548,0.061231,-204.397491,138.613831,-10.805427,...,-1.020453,-9.980793,-6.656221,-3.850046,-5.068009,-2.039894,-0.264972,-3.189402,-2.451722,jazz
499,hiphop.00099.wav,0.430902,0.153787,1962.886634,1916.814382,3798.243699,0.101571,-138.738937,100.225327,-31.469717,...,3.532051,-7.980703,0.803932,-2.977356,-0.726379,-7.721342,-1.218839,-6.524153,-5.398194,hiphop
919,rock.00019.wav,0.435911,0.112228,2321.185733,2118.985282,4702.063234,0.1282,-84.179573,102.337204,-41.332401,...,5.228018,-16.757763,1.092845,-9.666018,1.088416,-7.537924,3.236286,-4.712543,-0.318536,rock


In [74]:
label= data.label
x=data.iloc[:,1:-1]

scaler = StandardScaler()
x = scaler.fit_transform(np.array(data.iloc[:, 1:-1], dtype = float))

In [75]:
from sklearn.preprocessing import LabelEncoder
encoder=LabelEncoder()
y=encoder.fit_transform(label)
print(np.unique(y))

[0 1 2 3 4 5 6 7 8 9]


In [76]:
from sklearn.cluster import KMeans
import numpy as np

kmeans = KMeans(n_clusters=10,init='k-means++', random_state=0).fit(x)
# kmeans.labels_


In [77]:
test=pd.DataFrame()
test['y']=y
test['ny']=kmeans.labels_
test.head()
groupedtest=pd.DataFrame(test.groupby(by=['y','ny']).size().reset_index(name='count'))

In [78]:
from sklearn.metrics.cluster import adjusted_rand_score
print(adjusted_rand_score(test.y,test.ny))

0.17567683288131639


In [50]:
# groupedtest.head(20)

In [46]:
# import itertools 
# perm = itertools.permutations(np.unique(y)) 
# _totalperm=[]
  
# for i in list(perm): 
#     print(i,list(i))
#     _totalperm.append(list(i))

In [79]:
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics.cluster import normalized_mutual_info_score

In [80]:
oo=[1,1,1,1,1,3,3,3,3,3,3,6,6,6,6,6,2,2]
oo2=[2,2,2,2,2,2,6,6,6,6,6,3,3,1,1,1,1,1]

print(adjusted_rand_score( oo , oo2))

0.6004273504273504


In [81]:
print(normalized_mutual_info_score(oo , oo2))
print(mean_squared_error(oo , oo2))
print(adjusted_rand_score(oo , oo2))

0.7449755017898662
8.11111111111111
0.6004273504273504
