In [1]:
import librosa
import os
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
import numpy as np
from tqdm import tqdm

Using TensorFlow backend.


In [2]:
DATA_PATH="./data/"

def get_labels(path=DATA_PATH):
	labels=os.listdir(path)
	label_indices=np.arange(0,len(labels))
	return labels, label_indices, to_categorical(label_indices)

In [3]:
def speech2mfcc(file_path,max_len=11):
	wave, sr= librosa.load(file_path, mono=True, sr=None)
	wave=wave[::10]
	mfcc=librosa.feature.mfcc(wave,sr=16000)
#	print(mfcc)
	if (max_len > mfcc.shape[1]): #.shape[1], 1-->column
		pad_width=max_len-mfcc.shape[1]
		mfcc=np.pad(mfcc,pad_width=((0,0),(0,pad_width)),mode='constant')

	else:
		mfcc=mfcc[:,:max_len]
#	print(mfcc.shape)
	return mfcc

In [4]:
def save_data_to_array(path=DATA_PATH, max_len=11):
	labels,_,_=get_labels(path)

	for label in labels:
		mfcc_vectors=[]

		speechfiles=[path + label + '/' + speech for speech in os.listdir(path+'/'+label)]
		for speech in tqdm(speechfiles,"Saving vectors to label -'{}'".format(label)):
			mfcc=speech2mfcc(speech,max_len=max_len)
			print(mfcc.shape)
			mfcc_vectors.append(mfcc)
#		print(mfcc_vectors)
		np.save(label+'.npy',mfcc_vectors)

In [5]:
def get_train_test(split_ratio=0.8,random_state=42):
	labels,indices,_=get_labels(DATA_PATH)

	X=np.load(labels[0]+'.npy')
	y=np.zeros(X.shape[0])
	
	for i,label in enumerate(labels[1:]):
		x=np.load(label+'.npy')
		X=np.vstack((X,x))
		y=np.append(y,np.full(x.shape[0],fill_value=(i+1)))
	assert X.shape[0] == len(y)
#	print(X.shape)
#	print(y)
	return train_test_split(X,y,test_size=(1-split_ratio),random_state=random_state,shuffle=True)

In [6]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D

In [7]:
X_train, X_test, y_train, y_test=get_train_test()
#print(X_test.shape)
X_train=X_train.reshape(X_train.shape[0],20,11,1)
X_test=X_test.reshape(X_test.shape[0],20,11,1)
y_train_hot=to_categorical(y_train)
y_test_hot=to_categorical(y_test)

In [17]:
model=Sequential()
model.add(Conv2D(32,kernel_size=(2,2),activation='relu',input_shape=(20,11,1)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(10,activation='softmax'))

In [18]:
import matplotlib.pyplot as plt
import librosa.display

In [19]:
wave, sr= librosa.load(DATA_PATH+'8/8 (17).wav', mono=True, sr=None)
spectrogram=librosa.feature.melspectrogram(y=wave,sr=sr)

In [22]:
plt.figure(figsize=(10,4))
librosa.display.specshow(librosa.power_to_db(spectrogram,ref=np.max),
                        y_axis='mel',fmax=8000,x_axis='time(s)')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel Spectrogram')
plt.tight_layout()
plt.savefig('spectogram_8.svg',format='svg', dpi=1200)

ParameterError: Unknown axis type: time(s)

<Figure size 720x288 with 0 Axes>