In [2]:
import numpy as np
import os
import librosa
import matplotlib.pyplot as plt
from scipy import signal
from scipy.io import wavfile
from sklearn.model_selection import train_test_split
from preprocess import *
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.utils import to_categorical

file_path="audio"
max_pad_length=40

In [25]:
def convert_wave_to_spectrogram(path="C:/Users/abdal_000/Downloads/train/audio/go/0d53e045_nohash_1.wav"):
    
    sample_rate, samples = wavfile.read(path)
    frequencies, times, spectrogram = signal.spectrogram(samples, sample_rate)
    print(spectrogram.shape)
    plt.pcolormesh(times, frequencies, spectrogram)
    plt.imshow(spectrogram)
    plt.ylabel('Frequecy [HZ]')
    plt.xlabel('Time [sec]')
    plt.show()

In [29]:
def convert_wave_to_mfcc(filePath):
    wav, sr =librosa.load(filePath, mono = True, sr=None)
    mfcc = librosa.feature.mfcc(wav, sr=sr)
    #print(mfcc.shape)
    #padding
    return np.pad(mfcc, pad_width=((0, 0), (0, max_pad_length - mfcc.shape[1])), mode='constant')

In [30]:
def save_mfcc(path=file_path):
    labels = os.listdir(path)
    #print(labels)
    for label in labels:
        mfcc_vectors=[]
        audfiles=[path+'/'+label+'/'+aud for aud in os.listdir(path+'/'+label)]
        for audio in audfiles:
            mfcc_vectors.append(convert_wave_to_mfcc(audio))
        #print(len(mfcc_vectors))
        np.save(label+'.npy', mfcc_vectors)

In [None]:
convert_wave_to_spectrogram()
convert_wave_to_mfcc("C:/Users/abdal_000/Downloads/train/audio/go/0d53e045_nohash_1.wav")

In [None]:
def get_labels(path=file_path):
    labels=os.listdir(path)
    return labels

In [None]:
def padding_data():
    labels=get_labels()
    x_data=[]
    y_data=[]
    

In [None]:

def get_train_test(split_ratio=0.6, random_state=42):
    # Get available labels
    labels, indices, _ = get_labels(DATA_PATH)

    # Getting first arrays
    X = np.load(labels[0] + '.npy')
    y = np.zeros(X.shape[0])

    # Append all of the dataset into one single array, same goes for y
    for i, label in enumerate(labels[1:]):
        x = np.load(label + '.npy')
        X = np.vstack((X, x))
        y = np.append(y, np.full(x.shape[0], fill_value= (i + 1)))

    assert X.shape[0] == len(y)

    return train_test_split(X, y, test_size= (1 - split_ratio), random_state=random_state, shuffle=True)

In [None]:

X_train, X_test, y_train, y_test = get_train_test()
X_train = X_train.reshape(X_train.shape[0], 20, 11, 1)
X_test = X_test.reshape(X_test.shape[0], 20, 11, 1)
y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)
model = Sequential()
model.add(Conv2D(32, kernel_size=(2, 2), activation='relu', input_shape=(20, 11, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(3, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
model.fit(X_train, y_train_hot, batch_size=100, epochs=200, verbose=1, validation_data=(X_test, y_test_hot))