In [None]:
import os
import time
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import speechpy
import pywt
import scipy
import scipy.fftpack as fft
from scipy.io import wavfile
from scipy.signal import get_window
import codecs, json

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

Ekstraksi

In [None]:
def normalize_audio(audio):
    audio = audio/np.max(np.abs(audio))
    return audio

def frame_audio(audio, FFT_size = 2048, hop_size = 10, sample_rate=16000):
    audio = np.pad(audio, int(FFT_size/2), mode='reflect')
    frame_len = np.round(sample_rate * hop_size / 1000).astype(int)
    frame_num = int((len(audio) - FFT_size) / frame_len)+ 1
    frames = np.zeros((frame_num, FFT_size))
    for n in range(frame_num):
        frames[n] = audio[n*frame_len : n*frame_len+FFT_size]
        return frames

def freq_to_mel(freq):
    return 2595.0 * np.log10(1.0 + freq / 700.0)

def met_to_freq(mels):
    return 700.0 * (10.0**(mels / 2595.0) - 1.0)

def get_filter_points(fmin, fmax, mel_filter_num, FFT_size, sample_rate=16000):
    fmin_mel = freq_to_mel(fmin)
    fmax_mel = freq_to_mel(fmax)
    mels = np.linspace(fmin_mel, fmax_mel, num=mel_filter_num+2)
    freqs = met_to_freq(mels)
    return np.floor((FFT_size) / sample_rate * freqs).astype(int), freqs

def get_filters(filter_points, FFT_size):
    filters = np.zeros((len(filter_points)-2, int((FFT_size/2))))
    for n in range(len(filter_points)-2):
        filters[n, filter_points[n] : filter_points[n + 1]] = np.linspace(0, 1, filter_points[n + 1] - filter_points[n])
    filters[n, filter_points[n + 1] : filter_points[n + 2]] = np.linspace(1, 0, filter_points[n + 2] - filter_points[n + 1])
    return filters

def replaceZeroes(data):
    min_nonzero = np.min(data[np.nonzero(data)])
    data[data == 0] = min_nonzero
    return data

def dct(dct_filter_num, filter_len):
    basis = np.empty((dct_filter_num,filter_len))
    basis[0, :] = 1.0 / np.sqrt(filter_len)
    samples = np.arange(1, 2 * filter_len, 2) * np.pi / (2.0 * filter_len)
    for i in range(1, dct_filter_num): 
        basis[i, :] = np.cos(i * samples) * np.sqrt(2.0 / filter_len)
    return basis

In [None]:
fiturmean = np.empty((40, 1))
x = 0

sample_rate, audio = wavfile.read('./dummy-sound/1010_IOM_ANG_XX.wav')
waktuSekarang = time.time()
print("\t - Membaca audio...\t\t\t\t(done)")

In [None]:
if (len(audio.shape) > 1):
    audio1 = normalize_audio(audio[:,0])
else:
    audio1 = normalize_audio(audio)

audiohasil2 = audio1
hop_size = 12
FFT_size = 2048
audio_framed = frame_audio(audiohasil2, FFT_size=FFT_size, hop_size=hop_size, sample_rate=sample_rate)
print("\t - Audio Framing...\t\t\t\t(done)")

In [None]:
window = get_window("hamming", FFT_size, fftbins=True)
audio_win = audio_framed * window
print("\t - Windowing...\t\t\t\t\t(done)")

In [None]:
audio_winT = np.transpose(audio_win)
coeffs = pywt.wavedec(audio_winT, 'bior6.8', mode='sym', level=2);  # DWT
cA, cD1, cD2 = coeffs
audio_wavelet = pywt.waverec(coeffs, 'bior6.8', mode='sym')
audio_wavelet = np.transpose(audio_wavelet)
print("\t - Wavelet Transform...\t\t\t\t(done)")

In [None]:
audio_power = np.square(np.abs(audio_wavelet))
print("\t - Menghitung Audio Power...\t\t\t(done)")

In [None]:
freq_min = 0
freq_high = sample_rate / 2
mel_filter_num = 10

In [None]:
filter_points, mel_freqs = get_filter_points(freq_min, freq_high, mel_filter_num, 4096, sample_rate)
filters = get_filters(filter_points, 4096)
enorm = 2.0 / (mel_freqs[2:mel_filter_num+2] - mel_freqs[:mel_filter_num])
filters *= enorm[:, np.newaxis]
print("\t - Menghitung Filter Point...\t\t\t(done)")
            
audio_filtered = np.dot(filters, np.transpose(audio_power))
prob = replaceZeroes(audio_filtered)
audio_log = 10.0 * np.log10(audio_filtered)
print("\t - Melakukan Filterisasi Sinyal...\t\t(done)")
            
dct_filter_num = 40
dct_filters = dct(dct_filter_num, mel_filter_num)
cepstral_coefficents = np.dot(dct_filters, audio_log)
print("\t - Generate Nilai Cepstral Coefficient...\t(done)")

In [None]:
cepstral_coefficents = speechpy.processing.cmvn(cepstral_coefficents,True)

for xpos in range(len(cepstral_coefficents)):
    sigmax = 0
    for xn in cepstral_coefficents[xpos,:]:
        sigmax += xn
    fiturmean[xpos,0] = sigmax/len(np.transpose(cepstral_coefficents))

In [None]:
indextable = []
for a in range(40):
    indextable.append("fitur" + str(a+1))

df = pd.DataFrame(np.transpose(fiturmean),columns=indextable)
df.to_excel("WFCCTest.xlsx", index=False)

KNN

In [None]:
def klasifikasiKNN():
    path1 = 'WFCCTrain.xlsx' #data training
    path2 = 'WFCCTest.xlsx' #data test mentah

    dataset1 = pd.read_excel(path1, header=None)
    dataset2 = pd.read_excel(path2, header=None)

    x_train = dataset1.iloc[1:, :40].values
    y_train = dataset1.iloc[1:, 40].values

    x_test = dataset2.iloc[1:, :40].values
    knn = KNeighborsClassifier(n_neighbors=5, weights="distance", metric="euclidean")
    knn.fit(x_train,y_train)
    klasifikasiDataMentah = knn.predict(x_test)

    # # Dummy Plan B
    # if klasifikasiDataMentah == "MIDDLE":
    #     print("Prediksi : LOW")
    # elif klasifikasiDataMentah == "LOW":
    #     print("Prediksi : HIGH")
    # elif klasifikasiDataMentah == "HIGH":
    #     print("Prediksi : MIDDLE")
    print("Prediksi :", klasifikasiDataMentah)
    return klasifikasiDataMentah

time.sleep(0.1)
klasifikasiKNN()