In [1]:
import librosa
import IPython.display as ipyd
import numpy as np
import pandas as pd
import random 
import matplotlib.pyplot as plt
import csv
import math
from sklearn.preprocessing import MinMaxScaler

In [2]:
def trim_silence(y):
    mean = np.mean(abs(y))
    i = 0
    j = -1
    while y[i] < mean:
        i += 1
    while y[j] < mean:
        j -= 1
    return y[i:j]

In [3]:
def extract_mfcc(y):
    mfcc = librosa.feature.mfcc(y=y, n_mfcc=13, n_fft=y.size, hop_length=y.size, center=False)
    return np.reshape(mfcc, mfcc.size)

In [4]:
def extract_fcentroid(y, sr):
    cent = librosa.feature.spectral_centroid(y=y, sr=sr, n_fft=y.size, hop_length=y.size, center=False)
    return cent

In [5]:
def extract_zcr(y):
    a = []
    for i in y:
        if i > 0:
            a.append(1)
        elif i < 0:
            a.append(-1)
        else:
            a.append(0)
    zcr = 0
    for i in range(1,y.size):
        zcr += abs(a[i] - a[i-1])
    return zcr/(2*y.size)
            

In [6]:
words = ['Mot', 'Hai', 'Ba', 'Bon', 'Nam', 'Sau', 'Bay', 'Tam', 'Chin', 'Muoi']
nums = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
d = {'mfcc1':[], 'mfcc2':[], 'mfcc3':[], 'mfcc4':[], 'mfcc5':[], 'mfcc6':[], 'mfcc7':[], 'mfcc8':[], 'mfcc9':[], 'mfcc10':[], 'mfcc11':[], 'mfcc12':[], 'mfcc13':[], 'fcentroid':[], 'zcr':[], 'class':[]}
# features = np.array([np.zeros(13)])
for i in words:
    for j in nums:
        file_path = './data/' + i + j + '.wav'
        x, sr = librosa.load(file_path)
        #x = trim_silence(y)
        mfcc = extract_mfcc(x)
        for k in range(13):
            s = 'mfcc'+ str(k+1)
            d[s].append(mfcc[k])
        d['fcentroid'].append(extract_fcentroid(x, sr)[0,0])
        d['zcr'].append(extract_zcr(x))
        d['class'].append(i)
features = pd.DataFrame(data=d)
features

Unnamed: 0,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,mfcc10,mfcc11,mfcc12,mfcc13,fcentroid,zcr,class
0,22.677505,111.085785,-29.509369,17.556913,-45.030788,-21.841953,-3.334699,3.337915,5.085553,-0.056523,-20.480625,2.547899,6.018095,1385.091898,0.058065,Mot
1,-1.920408,130.058929,1.072041,1.828959,-29.393156,-25.927925,1.333704,-18.016750,6.639017,8.173170,-21.445820,-6.449494,-6.240080,1185.867295,0.046849,Mot
2,6.065578,111.849113,-28.093636,1.486623,-39.251434,-31.295361,-32.749294,-12.899841,-2.882684,9.564178,-7.706300,-1.332040,-5.319370,1263.344532,0.053409,Mot
3,69.856758,137.536789,-7.713745,-7.762856,-11.752549,-3.257017,-5.435544,-2.942955,-12.944557,7.465415,-14.273745,0.727341,-8.848277,1330.443458,0.038407,Mot
4,72.919968,149.959320,-17.861319,10.852497,-25.633686,-0.379863,-0.830895,21.137590,-5.732648,1.772147,-1.100827,4.088042,14.807987,1214.451678,0.043423,Mot
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,54.717751,116.601410,-85.521538,32.306526,-26.544178,-3.152604,16.897123,-58.483398,14.748135,8.873260,-1.893189,39.267464,-22.018459,2123.112737,0.079743,Muoi
96,97.444389,138.426880,-23.565144,-3.135663,8.022607,-6.569211,-8.307963,-11.208367,-4.159029,-3.821149,-4.116077,7.662721,-1.468883,1352.704683,0.043764,Muoi
97,202.638016,118.342255,-30.786961,9.574646,-13.091532,-1.147115,-32.434258,-26.613708,-19.617712,7.318149,-0.498770,-4.794875,1.954829,1802.353363,0.053834,Muoi
98,101.902885,144.483749,4.874279,-25.054792,6.312397,-19.476315,-20.282352,-8.271238,-57.648407,14.307973,-25.106329,15.522196,-0.083859,1578.842237,0.052146,Muoi


In [7]:
features.to_csv('features1.csv', index=False)

In [15]:
df = pd.read_csv('features1.csv')

X = df.drop('class', axis=1)

# Apply min-max scaling to normalize the data between -1 and 1
scaler = MinMaxScaler(feature_range=(-1, 1))
X_scaled = scaler.fit_transform(X)

# Combine the scaled features with the labels
df_scaled = pd.DataFrame(X_scaled, columns=X.columns)
df_scaled['class'] = df['class']

# Save the scaled data to a CSV file
df_scaled.to_csv('features2.csv', index=False)


In [29]:
def extract_feature(audio_file):
    x,sr=librosa.load(audio_file)
    feature=[]
    mfcc = extract_mfcc(x)
    for k in range(13):
        feature.append(mfcc[k])
    feature.append(extract_fcentroid(x, sr)[0,0])
    feature.append(extract_zcr(x))

    df=pd.read_csv("features.csv")
    for i in range(13):
        s='mfcc'+str(i+1)
        feature[i]=(feature[i]-df[s].min())/(df[s].max()-df[s].min())*2-1
    fc=extract_fcentroid(x, sr)[0,0]
    zcr=extract_zcr(x)
    feature[13]=(fc-df['fcentroid'].min())/(df['fcentroid'].max()-df['fcentroid'].min())*2-1
    feature[14]=(zcr-df['zcr'].min())/(df['zcr'].max()-df['zcr'].min())*2-1
    return feature

In [30]:
def distance(a,b):
    kc=0
    for i in range(15):
        kc+=pow((a[i]-b[i]),2)
    return math.sqrt(kc)

In [31]:
def get_feature(df,stt):
    b=[]
    for j in range(13):
        s='mfcc'+str(j+1)
        b.append(df[s][stt])
    b.append(df['fcentroid'][stt])
    b.append(df['zcr'][stt])
    return b

In [42]:
def extract_neighborclass(df,feature):
    m=[]
    n=[]
    for i in range(10):
        sd=random.randint(0,9)
        stt=i*10+sd
        n.append(stt)
        m.append(distance(feature,array_feature(df,stt)))
    sorted_indices = np.argsort(m)
    smallest_indices = sorted_indices[:5]
    print(smallest_indices)
    kq=[]
    for i in smallest_indices:
        kq.append(df['class'][n[i]])
    dict={'Mot':0,'Hai':1,'Ba':2,'Bon':3,'Nam':4,'Sau':5,'Bay':6,'Tam':7,'Chin':8,'Muoi':9}
    vt=[]
    for i in kq:
        vt.append(dict[i])
    return vt

In [49]:
def recognition(df,feature):
    m=[]
    n=[]
    arr=extract_neighborclass(df,feature)
    for i in arr:
        for j in range(10):
            m.append(distance(feature,array_feature(df,i*10+j)))
            n.append(i*10+j)
    sorted_indices = np.argsort(m)
    smallest_indices = sorted_indices[:1]
    kq=[]
    for i in smallest_indices:
        kq.append(df['class'][n[i]])
    return kq

In [50]:
audio_file='D:/Test/Ba1.wav'
feature=extract_feature(audio_file)
df=pd.read_csv('features2.csv')
print(extract_neighborclass(df,feature))
print(recognition(df,feature))

[6 0 7 1 4]
[6, 0, 7, 1, 4]
[2 1 0 6 5]
['Ba']
