In [1]:
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd


In [2]:
file_audio = "../input/group10-speechprocessing/10/19021217_DinhVietAnh/untitled1.wav"

In [3]:
ipd.Audio(file_audio)

In [4]:
signal, sr = librosa.load(file_audio)
print(sr)

In [5]:
mfccs = librosa.feature.mfcc(y=signal, n_mfcc=13, sr=sr)


In [6]:
mfccs.shape

In [7]:
plt.figure(figsize=(12, 5))
librosa.display.specshow(mfccs, 
                         x_axis="time", 
                         sr=sr)
plt.colorbar(format="%+2.f")
plt.show()


In [8]:
delta_mfccs = librosa.feature.delta(mfccs)


In [9]:
delta2_mfccs = librosa.feature.delta(mfccs, order=2)


In [10]:
delta_mfccs.shape


In [11]:
plt.figure(figsize=(12, 5))
librosa.display.specshow(delta_mfccs, 
                         x_axis="time", 
                         sr=sr)
plt.colorbar(format="%+2.f")
plt.show()


In [12]:
plt.figure(figsize=(12, 5))
librosa.display.specshow(delta2_mfccs, 
                         x_axis="time", 
                         sr=sr)
plt.colorbar(format="%+2.f")
plt.show()


In [13]:
mfccs_features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs))


In [14]:
mfccs_features.shape


# Trích xuất đặc trưng MFCC

In [15]:
from collections import defaultdict
import re

input_path = '../input/group10-speechprocessing/10'
output_path = './'
all_labels = ['0','1','2','3','4',
              '5','6','7','8','9',
              'trieu','nghin','tram','linh','muoi',
              'm1', 'tu', 'lam','mot','sil']
data = defaultdict(list)
labels = defaultdict(list)
    
def export_mcfcc(file_path):
    y, sr = librosa.load(file_path)
    mcfccs = librosa.feature.mfcc(y=signal, n_mfcc=13, sr=sr)
    delta_mfccs = librosa.feature.delta(mfccs)
    delta2_mfccs = librosa.feature.delta(mfccs, order=2)
    mcfccs_features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
    return mcfccs_features.T

def export_all_mcfcc():
    for folder_name in os.listdir(os.path.join(input_path)):
        for file_name in os.listdir(os.path.join(input_path, folder_name)):
            if file_name.endswith(".txt"):
                file_audio = os.path.join(input_path, folder_name, file_name.replace(".txt", ".wav"))
                file_txt = os.path.join(input_path, folder_name, file_name)
                if os.path.exists(file_txt) is False or os.path.exists(file_audio) is False:
                    continue

                print(file_txt)
                data_frame = pd.read_csv(file_txt, sep='\t', header=None)
                for _, row in data_frame.iterrows():
                    signal, sr = librosa.load(
                        path=file_audio,
                        offset = float(row[0]),
                        duration = float(row[1]) - float(row[0])
                    )
                    mcfccs = librosa.feature.mfcc(y=signal, n_mfcc=13, sr=sr)
                    delta_mfccs = librosa.feature.delta(mfccs)
                    delta2_mfccs = librosa.feature.delta(mfccs, order=2)
                    mcfccs_features = np.concatenate((mfccs, delta_mfccs, delta2_mfccs), axis=0)
                    x = str(row[2]).replace(" ","")
                    x = str(x).replace("\\","")
                    if x=='nan':
                        x= 'sil'
                    if x == 'ngin':
                        x = 'nghin'
                    data[x].append(mcfccs_features.T)
                    labels[x].append(all_labels.index(x))


In [16]:
print(labels['trieu'])

In [17]:
a = export_mcfcc(file_audio)


In [18]:
export_all_mcfcc()

# HMM

In [19]:
from sklearn.model_selection import train_test_split

X = {'train': {}, 'test': {}}
y = {'train': {}, 'test': {}}

print(data['1'])
for cname in all_labels:
    x_train, x_test, _, y_test = train_test_split(
        data[cname], labels[cname], 
        test_size = 0.2, 
    )
    X['train'][cname] = x_train
    X['test'][cname] = x_test
    y['test'][cname] = y_test



In [20]:
print(labels['trieu'])

In [21]:
for cname in all_labels:
    print(cname,len(X['train'][cname]), len(X['test'][cname]), len(y['test'][cname]))


In [22]:
import hmmlearn.hmm as hmm

states = [9,9,9,9,9,
          9,9,9,9,9,
          9,9,9,9,9,
          9,9,9,9,9,9]
model = {}
print(len(all_labels))
for idx, cname in enumerate(all_labels):
    print(cname)
    if cname == 'le':
        continue
    start_prob = np.full(states[idx], 0.0)
    start_prob[0] = 1.0
    trans_matrix = np.full((states[idx], states[idx]), 0.0)
    p = 0.5
    np.fill_diagonal(trans_matrix, p)
    np.fill_diagonal(trans_matrix[0:, 1:], 1 - p)
    trans_matrix[-1, -1] = 1.0
    
    #trans matrix
    print(trans_matrix) 

    model[cname] = hmm.GaussianHMM(
        n_components=states[idx], 
        verbose=True, 
        n_iter=300, 
        startprob_prior=start_prob, 
        transmat_prior=trans_matrix,
        params='stmc',
        init_params='mc',
        random_state=42
    )

    model[cname].fit(X=np.vstack(X['train'][cname]), lengths=[x.shape[0] for x in X['train'][cname]])

In [None]:
import pickle
!mkdir models_train
# save model
for cname in all_labels:
    print(cname)
    if cname == 'le':
        continue
    name = f'models_train/model_{cname}.pkl'
    with open(name, 'wb') as file: 
        pickle.dump(model[cname], file)

In [None]:
import pickle

model_train = {}
for key in all_labels:
    name = f"models_train/model_{key}.pkl"
    with open(name, 'rb') as file:
        model_train[key] = pickle.load(file)

In [None]:
!zip -r models.zip models_train