In [8]:
import numpy as np
from matplotlib import pyplot as plt
import librosa.display
import os
from scipy.io import wavfile
import warnings
from time import time
from librosa.feature import mfcc
from hmmlearn import hmm
import soundfile
import lab04_jwJang
import lab05_jwJang

warnings.filterwarnings("ignore")

def loadncheck_audio_file(filename, path_data, basesr) :
    wav, Fs = librosa.load(os.path.join(path_data,filename), sr=None, mono=False)
    if Fs != basesr :
        print(f'sample rate is not 16,000. wav:{Fs}')
        return wav, False
    elif wav.ndim != 1 :
        print(f"not mono file {wav.shape}")
        return wav, False
    return wav, True


###################################################################
# from https://github.com/gjang7/ssp2023/blob/main/proj1/proj1_nidr.ipynb
# HMM 학습 코드는 교수님이 주신 code 이용
###################################################################

# add '/' if path is not a null string
def addpath(path, file):
    if len(path) == 0: 
        return file
    else:
        return path + '/' + file

# extract MFCC features
def extmfcc(file):
    samplerate, d = wavfile.read(file)
    #features.append(mfcc(d, nwin=int(samplerate * 0.03), fs=samplerate, nceps= 6)[0])
    x = np.float32(d)
    hop=samplerate//100
    mc = mfcc(y=x, sr=samplerate, n_mfcc=num_mfcc, hop_length=hop, win_length=hop*2)
    return np.transpose(mc, (1,0))

def initByBakis(inumstates, ibakisLevel):
    startprobPrior = np.zeros(inumstates)
    startprobPrior[0: ibakisLevel - 1] = 1/float((ibakisLevel - 1))
    transmatPrior = getTransmatPrior(inumstates, ibakisLevel)
    return startprobPrior, transmatPrior

def getTransmatPrior(inumstates, ibakisLevel):
    transmatPrior = (1 / float(ibakisLevel)) * np.eye(inumstates)

    for i in range(inumstates - (ibakisLevel - 1)):
        for j in range(ibakisLevel - 1):
            transmatPrior[i, i + j + 1] = 1. / ibakisLevel

    for i in range(inumstates - ibakisLevel + 1, inumstates):
        for j in range(inumstates - i - j):
            transmatPrior[i, i + j] = 1. / (inumstates - i)

    return transmatPrior


############################################################################################## 
# hyperparameters - CHANGE THEM TO IMPROVE PERFORMANCE
# 1. number of MFCC (feature dimension)
#num_mfcc = 6
#num_mfcc = 10
num_mfcc = 13
# 2. Parameters needed to train GMMHMM
m_num_of_HMMStates = 3  # number of states
m_num_of_mixtures = 2  # number of mixtures for each hidden state
m_covarianceType = 'diag'  # covariance type
m_n_iter = 10  # number of iterations
m_bakisLevel = 2
m_startprobPrior, m_transmatPrior = initByBakis(m_num_of_HMMStates,m_bakisLevel)
print("StartProbPrior="); print(m_startprobPrior)
print("TransMatPrior="); print(m_transmatPrior)


############################################################################################## 
# acoustic model definition
class SpeechModel:
    def __init__(self,Class,label):
        self.traindata = np.zeros((0,num_mfcc))
        self.Class = Class
        self.label = label
        self.model  = hmm.GMMHMM(n_components = m_num_of_HMMStates, n_mix = m_num_of_mixtures, \
                transmat_prior = m_transmatPrior, startprob_prior = m_startprobPrior, \
                covariance_type = m_covarianceType, n_iter = m_n_iter)

##################################################################################
# folder structure:
#  ${rootpath} / ${speaker_name} / m:0-9 / ${tag}[t:0-${numtrials}]-[m:0-9]
#    m:0-9 model number
#    t:0-{numtrials} trial number
#  example: train_digits('segmented-train', {'gjang', 'do', 'son'}, 'kdigis', 10) 
#           will train with
#    segmented-train/gjang/0/kdigits0-0.wav
#    segmented-train/gjang/0/kdigits1-0.wav
#    ...
#    segmented-train/son/9/kdigits8-9.wav
#    segmented-train/son/9/kdigits9-9.wav
##################################################################################
def train_digits(rootpath, speakers, tag, num_trials=10):    
    ############################################################################################## 
    # 1. find files
    #    for user "gjang", digit 2, recording trial 0 (1st)
    #    "segmented/gjang/2/kdigits0-2.wav"
    # 2. extract MFCC features for training and testing
    #    for each digit, indexes 4 and 9 for test, and the rest for training

    #fpaths = []
    #labels = []
    spoken = []
    m_trainingsetfeatures = []
    m_trainingsetlabels = []

    count = 0
    for username in speakers:
        apath2 = addpath(rootpath, username)    # example: segmented/gjang
        for ii in range(10):   #dnum in os.listdir(apath2):
            dnum = str(ii)
            apath3 = addpath(apath2, dnum)     # example: segmented/gjang/2
            if dnum not in spoken:
                spoken.append(dnum)
            for trial in range(num_trials):
                file = addpath(apath3,"{}{}-{}.wav".format(tag,trial,dnum))      # segmented/gjang/2/kdigits0-2.wav
                mc = extmfcc(file)

                # display file names for the first 20 files only
                count += 1
                if count <= 20:
                    print(file, dnum, end=' '); print(mc.shape, end=' ')
                elif count == 21:
                    print('...'); print('')

                m_trainingsetfeatures.append(mc)
                m_trainingsetlabels.append(dnum)

    print('Words spoken:', spoken)
    #print("number of labels and features = %d, %d" % ( len(labels), len(features) ))
    #print("feature shape = ", end='')
    #print(features[0].shape)

    ############################################################################################## 
    ntrain = len(m_trainingsetlabels)

    print("[training] number of labels and features = %d, %d" % 
            ( len(m_trainingsetlabels), len(m_trainingsetfeatures)) )
    print ('Loading data completed')

    ############################################################################################## 
    # model initialization
    gmmhmmindexdict = {}
    index = 0
    for word in spoken:
        gmmhmmindexdict[word] = index
        index = index +1

    ############################################################################################## 
    # training GMMHMM Models 
    start = time()

    speechmodels = [None] * len(spoken)
    for key in gmmhmmindexdict:
        speechmodels[gmmhmmindexdict[key]] = SpeechModel(gmmhmmindexdict[key],key)

    for i in range(0,len(m_trainingsetfeatures)):
         for j in range(0,len(speechmodels)):
             if int(speechmodels[j].Class) == int(gmmhmmindexdict[m_trainingsetlabels[i]]):
                speechmodels[j].traindata = np.concatenate((speechmodels[j].traindata , m_trainingsetfeatures[i]))

    for speechmodel in speechmodels:
        speechmodel.model.fit(speechmodel.traindata)

    print ('Training completed -- {0} GMM-HMM models are built for {0} different types of words'.format(len(spoken)))
    print('time elapsed: %.2f seconds' % ( time() - start ))
    print (" "); print(" ")
    
    return speechmodels, gmmhmmindexdict

def validation_digits(speechmodels, gmmhmmindexdict, rootpath, speakers, tag, num_trials=10):    

    ############################################################################################## 
    # 1. find files
    #    for user "gjang", digit 2, recording trial 0 (1st)
    #    "segmented/gjang/2/kdigits0-2.wav"
    # 2. extract MFCC features for training and testing
    #    for each digit, indexes 4 and 9 for test, and the rest for training

    #fpaths = []
    #labels = []
    spoken = []
    m_features = []
    m_labels = []

    count = 0
    for username in speakers:
        apath2 = addpath(rootpath, username)    # example: segmented/gjang
        for ii in range(10):   #dnum in os.listdir(apath2):
            dnum = str(ii)
            apath3 = addpath(apath2, dnum)     # example: segmented/gjang/2
            if dnum not in spoken:
                spoken.append(dnum)
            for trial in range(num_trials):
                file = addpath(apath3,"{}{}-{}.wav".format(tag,trial,dnum))      # segmented/gjang/2/kdigits0-2.wav
                mc = extmfcc(file)

                # display file names for the first 20 files only
                count += 1
                if count <= 20:
                    print(file, dnum, end=' '); print(mc.shape, end=' ')
                elif count == 21:
                    print('...'); print('')

                m_features.append(mc)
                m_labels.append(dnum)

    print('Words spoken:', spoken)
    #print("number of labels and features = %d, %d" % ( len(labels), len(features) ))
    #print("feature shape = ", end='')
    #print(features[0].shape)

    ############################################################################################## 
    print("[validation] number of labels and features = %d, %d" % ( len(m_labels), len(m_features)) )
    print ('Loading data completed')

    ############################################################################################## 
    # testing
    print("Prediction started")
    m_PredictionlabelList = []

    for i in range(0,len(m_features)):
        scores = []
        for speechmodel in speechmodels:
             scores.append(speechmodel.model.score(m_features[i]))
        id  = scores.index(max(scores))
        m_PredictionlabelList.append(speechmodels[id].Class)
        #print(str(np.round(scores, 3)) + " " + str(max(np.round(scores, 3))) +" "+":"+ speechmodels[id].label)

    accuracy = 0.0
    count = 0
    print("")
    print("Prediction for Testing DataSet:")

    for i in range(0,len(m_labels)):
        #print( "Label"+str(i+1)+":"+m_labels[i])
        if gmmhmmindexdict[m_labels[i]] == m_PredictionlabelList[i]:
           count = count+1

    accuracy = 100.0*count/float(len(m_labels))

    print("")
    print("accuracy ="+str(accuracy))
    print("")
    return accuracy


##############################################
# 여기 까지 교수님이 제공해주신 코드
##############################################
# gamma to VUV
def soft_VAD(gamma, threshold= 0.5) :
    soft_vad = np.zeros(len(gamma), dtype=float)
    for n in range(len(gamma)) :
        if gamma[n] > threshold :
            soft_vad[n] = 1.0
        else :
            soft_vad[n] = 0.0
    return soft_vad


# 음성구간을 찾기 위해 가장 긴 구간 찾기
def long_term(vuv, Ns) :
    max_len = 0
    checking = False
    start_point = 0
    end_point = 0
    for i in range(len(vuv)) :
        if vuv[i] == 1.0 and not checking :
            checking = True
            temp_len = Ns
            temp_start = i*Ns
        elif vuv[i] == 1.0 and checking :
            temp_len = temp_len + Ns
        if (vuv[i] == 0.0 or i == len(vuv)-1)and checking :
            checking = False
            if temp_len > max_len :
                max_len = temp_len
                start_point = temp_start
                end_point = (i*Ns) -1
                if i == len(vuv)-1 :
                    end_point = ((i+1)*Ns) -1

    return start_point, end_point

#lab04에서 사용했던 코드, 녹음에서 여백차를 판단해 0.1정도를 noise로 판단
def lab04 (x, Tf, Ts, Fs, order, op_Filter) :
    Tn = 0.1
    E_N = lab04_jwJang.Energy_Noise(x, Tf, Tn, Ts, Fs)
    (VUV, VUV_mid, s_point, e_point) = lab04_jwJang.EPD(x, Tf, Ts, Fs, E_N, 1.0)
    # 무성자음 및 fading-out 포함(40ms)
    padding = 0.04
    # reference : 후행하는 유,무성자음에 의한 모음의 지속시간 고찰
    start_point = np.maximum(s_point - int(padding*Fs), 0)
    end_point = np.minimum(e_point + int(padding*Fs), len(x)-1)
    # 모든 Noise 구간에 대하여 |N(w)|^2 계산
    N_all = lab04_jwJang.All_Noise_Avg(x, Tf, Ts, Fs, start_point, end_point)

    (VUV, VUV_mid, s_point, e_point) = lab04_jwJang.EPD(x, Tf, Ts, Fs, N_all, 1.2)

    # 전부 noise라고 판단한 경우
    if s_point == 0.0 and e_point == 0.0 :
        return x
    
    start_point = np.maximum(s_point - int(padding*Fs), 0)
    end_point = np.minimum(e_point + int(padding*Fs), len(x)-1)
    N_all = lab04_jwJang.All_Noise_Avg(x, Tf, Ts, Fs, start_point, end_point)
    
    vuv_mid = np.zeros(len(x), dtype=float)
    for i in range(len(VUV_mid)) :
        vuv_mid[i*int(Ts*Fs):(i+1)*int(Ts*Fs)] = VUV_mid[i]
    plt.plot(x)
    plt.plot(vuv_mid)
    
    if op_Filter :
        # 모든 Noise 구간에 대한 |N(w)|^2 으로 FIR filter 적용
        return lab04_jwJang.FIR_process(x[start_point:end_point], Tf, Ts, Fs, N_all, order)
    else :
        return x[start_point:end_point]



def lab05_time_domain(x, Tf, Ts, Fs, order, op_Filter, epochs=10) :
    x_t, X2_w = lab05_jwJang.short_time(x, Tf, Ts, Fs)
    E_x = np.mean(x_t*x_t, axis=-1)
    
    gamma = lab05_jwJang.VAD_time(E_x,epochs=epochs,dispstep = 0)

    VUV = soft_VAD(gamma)
    VUV_mid = lab04_jwJang.midian_filter(VUV)
    vuv_mid = np.zeros(len(x), dtype=float)

    for i in range(len(VUV_mid)) :
        vuv_mid[i*int(Ts*Fs):(i+1)*int(Ts*Fs)] = VUV_mid[i]
    plt.plot(x)
    plt.plot(vuv_mid)

    s_point, e_point = long_term(VUV_mid, int(Fs*Ts))

    # 전부 noise라고 판단한 경우
    if s_point == 0.0 and e_point == 0.0 :
        return x[int(0.2*Fs): -int(0.2*Fs)]
    
    # 무성자음 및 fading-out 포함(40ms)
    padding = 0.04
    # reference : 후행하는 유,무성자음에 의한 모음의 지속시간 고찰
    start_point = np.maximum(s_point - int(padding*Fs), 0)
    end_point = np.minimum(e_point + int(padding*Fs), len(x)-1)

    if op_Filter :
        Nest = np.dot(1-gamma,X2_w)/np.sum(1-gamma)
        x_hat = lab05_jwJang.wf_process(X2_w, Nest, x, Ts, Tf, Fs, order)
        return x_hat[start_point:end_point]
    else :
        return x[start_point:end_point]

def lab05_dual_Rayleigh(x, Tf, Ts, Fs, order, op_Filter, epochs=10) :
    x_t, X2_w = lab05_jwJang.short_time(x, Tf, Ts, Fs)
    
    gamma, sigmasq_N = lab05_jwJang.VAD_dual_Rayleigh(X2_w,epochs=epochs,dispstep = 0)

    VUV = soft_VAD(gamma)
    VUV_mid = lab04_jwJang.midian_filter(VUV)
    vuv_mid = np.zeros(len(x), dtype=float)

    for i in range(len(VUV_mid)) :
        vuv_mid[i*int(Ts*Fs):(i+1)*int(Ts*Fs)] = VUV_mid[i]
    plt.plot(x)
    plt.plot(vuv_mid)

    s_point, e_point = long_term(VUV_mid, int(Fs*Ts))

    # 전부 noise라고 판단한 경우
    if s_point == 0.0 and e_point == 0.0 :
        return x[int(0.2*Fs): -int(0.2*Fs)]

    # 무성자음 및 fading-out 포함(40ms)
    padding = 0.04
    # reference : 후행하는 유,무성자음에 의한 모음의 지속시간 고찰
    start_point = np.maximum(s_point - int(padding*Fs), 0)
    end_point = np.minimum(e_point + int(padding*Fs), len(x)-1)

    if op_Filter :
        x_hat = lab05_jwJang.wf_process(X2_w, sigmasq_N, x, Ts, Tf, Fs, order)
        return x_hat[start_point:end_point]
    else :
        return x[start_point:end_point]


def lab05_log_frequency(x, Tf, Ts, Fs, order, op_Filter, epochs=10) :
    x_t, X2_w = lab05_jwJang.short_time(x, Tf, Ts, Fs)
    E_x = np.mean(x_t*x_t, axis=-1)
    
    gamma, mu_N = lab05_jwJang.VAD_log_frequency(X2_w,epochs=epochs,dispstep = 0)

    VUV = soft_VAD(gamma)
    VUV_mid = lab04_jwJang.midian_filter(VUV)
    vuv_mid = np.zeros(len(x), dtype=float)

    for i in range(len(VUV_mid)) :
        vuv_mid[i*int(Ts*Fs):(i+1)*int(Ts*Fs)] = VUV_mid[i]
    plt.plot(x)
    plt.plot(vuv_mid)

    s_point, e_point = long_term(VUV_mid, int(Fs*Ts))
    # 무성자음 및 fading-out 포함(40ms)
    padding = 0.04
    # reference : 후행하는 유,무성자음에 의한 모음의 지속시간 고찰
    start_point = np.maximum(s_point - int(padding*Fs), 0)
    end_point = np.minimum(e_point + int(padding*Fs), len(x)-1)

    if op_Filter :
        Nest = np.exp(2*mu_N)
        x_hat = lab05_jwJang.wf_process(X2_w, Nest, x, Ts, Tf, Fs, order)
        return x_hat[start_point:end_point]
    else :
        return x[start_point:end_point]


def process_files(from_path,to_path,wav_para,speakers,VAD='EPD',op_Filter=True) :
    for speaker in speakers :
        for label in os.listdir(os.path.join(from_path,speaker)) :
            if not os.path.isdir(os.path.join(from_path,speaker, label)) :
                continue
            os.system('mkdir -p %s'%os.path.join(to_path,speaker, label))
            for num in os.listdir(os.path.join(from_path,speaker, label)) :
                if not num.endswith('wav') :
                    continue
                print(os.path.join(from_path,speaker, label, num))
                wav, Fs = librosa.load(os.path.join(from_path,speaker, label, num), sr = wav_para['sr'])
                if VAD =='EPD' :
                    plt.figure()
                    ch_wav = lab04(wav,wav_para['Tf'], wav_para['Ts'], Fs, wav_para['order'],op_Filter)
                    plt.savefig(os.path.join(to_path,speaker, label, f'{num[:-4]}.png'))
                    plt.close()
                    soundfile.write(os.path.join(to_path,speaker, label, num), ch_wav, Fs)
                elif VAD =='time_domain' :
                    #앞뒤로 0.2초 noise 삽입
                    wav = np.concatenate((wav[0]*np.ones(int(0.2*Fs)), wav, wav[0]*np.ones(int(0.2*Fs))))
                    plt.figure()
                    ch_wav = lab05_time_domain(wav,wav_para['Tf'], wav_para['Ts'], Fs, wav_para['order'],op_Filter)
                    plt.savefig(os.path.join(to_path,speaker, label, f'{num[:-4]}.png'))
                    plt.close()
                    soundfile.write(os.path.join(to_path,speaker, label, num), ch_wav, Fs)
                elif VAD =='dual_Rayleigh' :
                    #앞뒤로 0.2초 noise 삽입
                    wav = np.concatenate((wav[0]*np.ones(int(0.2*Fs)), wav, wav[0]*np.ones(int(0.2*Fs))))
                    plt.figure()
                    ch_wav = lab05_dual_Rayleigh(wav,wav_para['Tf'], wav_para['Ts'], Fs, wav_para['order'],op_Filter)
                    plt.savefig(os.path.join(to_path,speaker, label, f'{num[:-4]}.png'))
                    plt.close()
                    soundfile.write(os.path.join(to_path,speaker, label, num), ch_wav, Fs)
                elif VAD =='log_frequency' :
                    #앞뒤로 0.2초 noise 삽입
                    wav = np.concatenate((wav[0]*np.ones(int(0.2*Fs)), wav, wav[0]*np.ones(int(0.2*Fs))))
                    plt.figure()
                    ch_wav = lab05_log_frequency(wav,wav_para['Tf'], wav_para['Ts'], Fs, wav_para['order'],op_Filter)
                    plt.savefig(os.path.join(to_path,speaker, label, f'{num[:-4]}.png'))
                    plt.close()
                    soundfile.write(os.path.join(to_path,speaker, label, num), ch_wav, Fs)


def mkdata_train_test(trainroot, valroot, testroot, to_path, wav_para, f, op_filter = False, val_op = 'org', mk_option='all') :

    to_train_path = f'{trainroot}-{to_path}'
    to_val_path = f'{valroot}-{to_path}'
    to_test_path = f'{testroot}-{to_path}'
    if op_filter :
        to_train_path = f'{to_train_path}-filter'
        to_val_path = f'{to_val_path}-filter'
        to_test_path = f'{to_test_path}-filter'
    valclean = addpath(valroot, val_op)
    to_valclean = addpath(to_val_path, val_op)
    testclean = addpath(testroot, val_op)
    to_testclean = addpath(to_test_path, val_op)

    if mk_option  == 'all' or mk_option == 'train':
        print(f'Start train_EPD {trainroot} to {to_train_path}')
        process_files(trainroot,to_train_path,wav_para, labels_train,VAD=to_path,op_Filter=op_filter)

    if mk_option  == 'all' or mk_option == 'val':
        print(f'Start val_EPD {valclean} to {to_valclean}')
        process_files(valclean,to_valclean,wav_para, labels_val,VAD=to_path,op_Filter=op_filter)

    if mk_option  == 'all' or mk_option == 'test':
        print(f'Start test_EPD {testclean} to {to_testclean}')
        process_files(testclean,to_testclean,wav_para, labels_test,VAD=to_path,op_Filter=op_filter)
    print(f'\nnoise:{val_op} :: Apply {to_path} - filter:{op_filter}\n')
    f.write(f'\nApply {to_path} - filter:{op_filter}\n')
    sp_models, indexdict = train_digits(to_train_path, labels_train, 'kdigits', num_trials=10)
    acc = validation_digits(sp_models, indexdict, to_train_path, labels_train, 'kdigits', num_trials=10)
    f.write(f'test train_set accuracy :{acc}\n')
    acc = validation_digits(sp_models, indexdict, to_valclean, labels_val, 'kdigits', num_trials=10)
    f.write(f'test validation_set accuracy :{acc}\n')
    acc = validation_digits(sp_models, indexdict, to_testclean, labels_test, 'kdigits', num_trials=10)
    f.write(f'test test_set accuracy :{acc}\n')


# 기본적인 dataset 정보
## 이상한 데이터셋 제거.
trainroot = 'segmented-train'
labels_train = {'11jeonghy', 
                'Dandyst', 
                #'deokkyukwon',
                'InkooJeon',
                #'ohjihyeon',
                #'shin3875',
                'son',
                'YouYeNa',
               }

valroot = 'segmented-val'
valclean = addpath(valroot, 'org')
labels_val = {
                'chlee',
                'do',
                #'kyeong',
               }

testroot = 'unsegmented-test'
testclean = addpath(valroot, 'org')
labels_test = {
                'gjang',
               }


testroot_seg = 'segmented-test'
testclean_seg = addpath(testroot_seg, 'org')
labels_test = {
                'gjang',
               }

def main() :
    # wav parameter option
    Train_EPD = 'True'
    os.system('mkdir -p %s'%('result'))
    num_result = len(os.listdir('result'))+1
    f = open(f'result/result_{num_result}.txt', 'w')
    wav_para = dict()
    wav_para['Ts'] = 0.01
    wav_para['Tf'] = 0.02
    wav_para['sr'] = 16000
    wav_para['order'] = 62
    val_list = ['nbnSNR-10','nbnSNR0','nbnSNR10','org','wbnSNR-10','wbnSNR0','wbnSNR10']

    speechmodels, gmmhmmindexdict = train_digits(trainroot, labels_train, 'kdigits', num_trials=10)
    acc = validation_digits(speechmodels, gmmhmmindexdict, trainroot, labels_train, 'kdigits', num_trials=10)
    f.write(f'test train_set accuracy :{acc}\n')
    acc = validation_digits(speechmodels, gmmhmmindexdict, valclean, labels_val, 'kdigits', num_trials=10)
    f.write(f'test validation_set accuracy :{acc}\n')
    
    for val_option in val_list :
        mkdata_train_test(trainroot, valroot, testroot_seg, 'EPD', wav_para, f, op_filter = False, val_op = val_option, mk_option='none')
        mkdata_train_test(trainroot, valroot, testroot_seg, 'time_domain', wav_para, f, op_filter = False, val_op = val_option, mk_option='none')
        mkdata_train_test(trainroot, valroot, testroot_seg, 'dual_Rayleigh', wav_para, f, op_filter = False, val_op = val_option, mk_option='none')
        mkdata_train_test(trainroot, valroot, testroot_seg, 'EPD', wav_para, f, op_filter = True, val_op = val_option, mk_option='none')
        mkdata_train_test(trainroot, valroot, testroot_seg, 'time_domain', wav_para, f, op_filter = True, val_op = val_option, mk_option='none')
        mkdata_train_test(trainroot, valroot, testroot_seg, 'dual_Rayleigh', wav_para, f, op_filter = True, val_op = val_option, mk_option='none')
    
    # log_frequency는 wav 구간을 제대로 잡아내지 못하여 전부 noise로 판단해서 wav가 없어지는 문제로 제거
    #mkdata_train_test(trainroot, valroot, 'log_frequency', wav_para, f, op_filter = False, val_op = 'org', mk_option=False)

    f.close()


if __name__ == '__main__' :
    main()

StartProbPrior=
[1. 0. 0.]
TransMatPrior=
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
segmented-train/InkooJeon/0/kdigits0-0.wav 0 (255, 13) segmented-train/InkooJeon/0/kdigits1-0.wav 0 (233, 13) segmented-train/InkooJeon/0/kdigits2-0.wav 0 (265, 13) segmented-train/InkooJeon/0/kdigits3-0.wav 0 (240, 13) segmented-train/InkooJeon/0/kdigits4-0.wav 0 (249, 13) segmented-train/InkooJeon/0/kdigits5-0.wav 0 (263, 13) segmented-train/InkooJeon/0/kdigits6-0.wav 0 (272, 13) segmented-train/InkooJeon/0/kdigits7-0.wav 0 (263, 13) segmented-train/InkooJeon/0/kdigits8-0.wav 0 (254, 13) segmented-train/InkooJeon/0/kdigits9-0.wav 0 (230, 13) segmented-train/InkooJeon/1/kdigits0-1.wav 1 (250, 13) segmented-train/InkooJeon/1/kdigits1-1.wav 1 (242, 13) segmented-train/InkooJeon/1/kdigits2-1.wav 1 (248, 13) segmented-train/InkooJeon/1/kdigits3-1.wav 1 (252, 13) segmented-train/InkooJeon/1/kdigits4-1.wav 1 (228, 13) segmented-train/InkooJeon/1/kdigits5-1.wav 1 (257, 13) segmented-train/InkooJeon/1/kdig