In [1]:
import os 
import sys

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


import librosa
import IPython.display as ipd
import noisereduce as nr
import malaya_speech 
import tensorflow as tf
import pickle 

PATH = '../data/raw/speech_commands_v0.01/'
TEST_PATH = PATH + 'testing_list.txt'
VAL_PATH = PATH + 'validation_list.txt'



In [12]:
class Oratio:
    
    def __init__(self):
        self.model = tf.keras.models.load_model('../models/oratio_model.h5')
        with open("../models/le.obj",'rb') as f:
            self.le = pickle.load(f)
    
    def prepare_data(self, filename):

        sr=16000
        vad = malaya_speech.vad.webrtc()
        samples, sample_rate = librosa.load(filename, sr=16000)
        samples = nr.reduce_noise(y=samples, sr=1600, stationary=True)
        y_ = malaya_speech.resample(samples, sr, 16000)
        y_ = malaya_speech.astype.float_to_int(y_)
        frames = malaya_speech.generator.frames(samples, 30, sr)
        frames_ = list(malaya_speech.generator.frames(y_, 30, 16000, append_ending_trail=False))
        frames_webrtc = [(frames[no], vad(frame)) for no, frame in enumerate(frames_)]
        y_ = malaya_speech.combine.without_silent(frames_webrtc)
        zero = np.zeros(((sr+4000)-y_.shape[0]))
        signal = np.concatenate((y_, zero))
    
        return signal
    
    def extract_mfcc(self, array):

        mfcc_feat = librosa.feature.mfcc(y=array, sr=16000, n_mfcc=13)
        mfccs = np.array([mfcc_feat.flatten()])

        return None 
    
    def predict(self, filename):
        
        signal = self.prepare_data(filename=filename)
        mfcc_input = self.extract_mfcc(array=signal)
        output = self.model.predict(mfcc_input)
        index = np.argmax(output[0])
        prediction = self.le.inverse_transform([index])[0]

        return prediction 


In [13]:
model = Oratio()

EOFError: Ran out of input

In [None]:
signal = model.prepare_data(filename='../data/upload/0e5193e6_nohash_1.wav')

In [None]:
mfccs = model.extract_mfcc(array=signal)

In [None]:
data = np.array(mfccs)
# data = data.reshape(np.array(mfccs).shape[0], -1)

In [None]:
np.array([mfccs.flatten()]).shape

In [None]:
def get_commands():
    exempt_list = [
    '.DS_Store', 'validation_list.txt', 'LICENSE',
    '_background_noise_', 'README.md', 'testing_list.txt'
]

    commands = get_directory_content(path=PATH)
    commands = [command for command in commands if command not in exempt_list]

    return commands 

def get_directory_content(path):
    return os.listdir(path)

def open_file(filename):
    
    f = open(filename)
    return f.read().splitlines()

def compile_dataset():

    commands = get_commands()

    filenames = []
    for command in commands:
        recordings = get_directory_content(path=PATH+command)
        recordings = [command+'/'+recording for recording in recordings]
        filenames = filenames + recordings
                             
    return filenames

In [None]:
files = compile_dataset()

In [None]:
samples, sample_rate = librosa.load(PATH+files[0], sr=16000)
fig = plt.figure(figsize=(14,8))
ax1 = fig.add_subplot(211)
ax1.set_title(f"Raw wave of {files[0]}")
ax1.set_xlabel('time')
ax1.set_ylabel('Amplitude')
ax1.plot(np.linspace(0, sample_rate/len(samples), sample_rate), samples)
plt.show()

In [None]:
fs = 16000
sr = fs
ipd.Audio(samples, rate=fs)

In [None]:
time = np.linspace(0, len(samples-1)/fs, len(samples -1))
no_noise = nr.reduce_noise(y=samples, sr=fs, stationary=True)
plt.figure(figsize=(14,8))
plt.plot(time, no_noise)
plt.show()

In [None]:
ipd.Audio(no_noise, rate=sample_rate)

In [None]:
vad = malaya_speech.vad.webrtc()
y = no_noise
y_ = malaya_speech.resample(y, sr, 16000)
y_ = malaya_speech.astype.float_to_int(y_)
frames = malaya_speech.generator.frames(y, 30, sr)
frames_ = list(malaya_speech.generator.frames(y_, 30, 16000, append_ending_trail=False))
frames_webrtc = [(frames[no], vad(frame)) for no, frame in enumerate(frames_)]
y_ = malaya_speech.combine.without_silent(frames_webrtc)

In [None]:
ipd.Audio(y_, rate=sr)

In [None]:
zero = np.zeros((1*sr-y_.shape[0]))
signal = np.concatenate((y_, zero))
signal.shape
time = np.linspace(0, len(signal-1)/fs, len(signal-1))

In [None]:
plt.plot(time, signal)
plt.show()

In [None]:
test_files = open_file(TEST_PATH)
validation_files = open_file(VAL_PATH)
train_files = list(set(files).difference(set(test_files+validation_files)))

In [None]:
def prepare_dataset(filenames):
    
    sr=16000
    vad = malaya_speech.vad.webrtc()
    data = [None] * len(filenames)
    labels = [None] * len(filenames)
    for i, file in enumerate(filenames):
        samples, sample_rate = librosa.load(PATH+file, sr=16000)
        samples = nr.reduce_noise(y=samples, sr=fs, stationary=True)
        y_ = malaya_speech.resample(samples, sr, 16000)
        y_ = malaya_speech.astype.float_to_int(y_)
        frames = malaya_speech.generator.frames(samples, 30, sr)
        frames_ = list(malaya_speech.generator.frames(y_, 30, 16000, append_ending_trail=False))
        frames_webrtc = [(frames[no], vad(frame)) for no, frame in enumerate(frames_)]
        y_ = malaya_speech.combine.without_silent(frames_webrtc)
        zero = np.zeros(((sr+4000)-y_.shape[0]))
        signal = np.concatenate((y_, zero))
        
        data[i] = signal
        labels[i] = file.split('/')[0]
    
    return data, labels

In [None]:
%%time
test_data, test_labels = prepare_dataset(filenames=test_files[0:1])

In [None]:
%%time
val_data, val_labels = prepare_dataset(filenames=validation_files)

In [None]:
%%time
train_data, train_labels = prepare_dataset(filenames=train_files)

In [None]:
def extract_mfcc(data):
    fs = 16000
    mfccs = [None]*len(data)
    for i, array in enumerate(data):
        
#         mfcc_feat = mfcc(array , fs, winlen=256/fs, winstep=256/(2*fs), numcep=13, nfilt=26, nfft=256,
#                  lowfreq=0, highfreq=fs/2, preemph=0.97, ceplifter=22, appendEnergy=True, winfunc=np.hamming)
#         mfcc_feat= np.transpose(mfcc_feat)
        
        mfcc_feat = librosa.feature.mfcc(y=array, sr=fs, n_mfcc=13)
        mfccs[i] = mfcc_feat
    
    return mfccs

In [None]:
%%time
test_mfcc = extract_mfcc(data=test_data)

In [None]:
%%time
val_mfcc = extract_mfcc(data=val_data)

In [None]:
%%time
train_mfcc = extract_mfcc(data=train_data)

In [None]:
def reshape_array(mfccs):
    data = np.array(mfccs)
    data = data.reshape(np.array(mfccs).shape[0], -1)
    
    return data

In [None]:
# train = reshape_array(mfccs=train_mfcc)
# val = reshape_array(mfccs=val_mfcc)
test = reshape_array(mfccs=test_mfcc)

In [None]:
train = pd.DataFrame(train)
val = pd.DataFrame(val)
test = pd.DataFrame(test)

In [None]:
train['labels'] = train_labels

In [None]:
val['labels'] = val_labels
test['labels'] = test_labels

In [None]:
train.to_csv('train.csv', index=False)
val.to_csv('val.csv', index=False)
test.to_csv('test.csv', index=False)