## 预处理

读取数据并处理后划分训练测试集
* train：10个列表，每个列表存储了前14位说话人说这个单词的每一帧
* test：其余两位说话人的数据

In [26]:
import os
import librosa
import numpy as np

# generate the dataset of train and save it
data = []
for number in range(10):
    frames = []
    mydir = 'C:/Users/Rhodia/Desktop/records/records/digit_' + str(number) + '/'
    myfile = os.listdir(mydir)
    for i in range(0,len(myfile) - 2):
        url = mydir + myfile[i]
        y, sr = librosa.load(url,sr=16000)
        mfccs = librosa.feature.mfcc(y=y,sr=sr,n_mfcc=12)
        for frame in mfccs.T:
            frames.append(frame)
    data.append(frames)

train = np.array(data)
np.save('train.npy', train)



# generate the dataset of test and save it
data = []
for number in range(10):
    mydir = 'C:/Users/Rhodia/Desktop/records/records/digit_' + str(number) + '/'
    myfile = os.listdir(mydir)
    for i in range(len(myfile)-2,len(myfile)):
        url = mydir + myfile[i]
        y, sr = librosa.load(url,sr=16000)
        mfccs = librosa.feature.mfcc(y=y,sr=sr,n_mfcc=12)
        data.append([mfccs.T,number])

test = np.array(data)
np.save('test.npy', test)

In [27]:
# load the data sets of train and set
import numpy as np
train = np.load('train.npy')
train = train.tolist()

test = np.load('test.npy')
test = test.tolist()

## GMM算法实现

分别训练10个数字的GMM模型，下面以Zero为例

In [8]:
import scipy as sc
from scipy import random, linalg, stats, special

In [9]:
# zeros.shape (488, 12)
zeros = train[0]
y = np.array(zeros)

选取12维特征的MFCC

共488个样本

给定8个高斯混合模型

In [10]:
#parameters
NProperties = 12
NClasses = 8
NObjects = y.shape[0]

随机生成每一类的均值

以整体方差为基础，随机生成每一类的协方差

每一类的概率初始相等

In [11]:
# function that generates positive symmetric definite matrices
def PosSymDefMatrix(n,sd):
    M = np.matrix(np.random.rand(n,n))
    M = 0.5*(M + M.T)
    M = M + sd*np.eye(n)
    return M

sdDiff = np.var(y)
SDClass = np.random.rand(1,NClasses) + sdDiff

Cov = [PosSymDefMatrix(NProperties,i) for i in SDClass[0]]

In [12]:
initMu = np.empty([NClasses, NProperties])
initCov = np.empty([NProperties, NProperties, NClasses])

for j in range(NClasses):
    
    initMu[j,:] = np.random.random(NProperties)*np.max(y, axis=0)
    initCov[:,:,j] = np.mean(np.array(Cov), axis=0) + random.randint(0,50)
    
initW = np.repeat(1.0/NClasses,NClasses)

EM算法

In [13]:
def EStep(y, w, mu, cov):
    
    r_ij = np.zeros((y.shape[0], mu.shape[0]))

    for Object in range(y.shape[0]):
        
        r_ij_Sumj = np.zeros(mu.shape[0])
        
        for jClass in range(mu.shape[0]):
            
            r_ij_Sumj[jClass] = w[jClass] * sc.stats.multivariate_normal.pdf(y[Object,:], mu[jClass,:], cov[:,:,jClass])
        
        for jClass in range(r_ij_Sumj.shape[0]):
            
            r_ij[Object,jClass] = r_ij_Sumj[jClass] / np.sum(r_ij_Sumj)
    
    return r_ij

In [14]:
def MStep(r, y, mu, cov):
    
    N = y.shape[0]
    
    # the weigths
    w_j = np.sum(r, axis=0) / N
    
    Allmu_j = np.zeros((N, mu.shape[0], mu.shape[1]))
    Allcov_j = np.zeros((N, cov.shape[0], cov.shape[1], cov.shape[2]))
    
    # mean
    for Object in range(N):
        
        Allmu_j[Object,:,:] = np.outer(r[Object,:], y[Object,:])
    
    mu_j = np.zeros((mu.shape[0], mu.shape[1]))
    
    for j in range(cov.shape[2]):
        mu_j[j,:] = (1/np.sum(r, axis=0)[j]) * np.sum(Allmu_j, axis=0)[j,:]
        
    # sd
    for Object in range(N):
        for j in range(cov.shape[2]):
            Allcov_j[Object,:,:,j] = r[Object,j] * np.outer((y[Object,:] - mu_j[j,:]), (y[Object,:]-mu_j[j,:]))

    cov_j = np.zeros((cov.shape[0], cov.shape[1], cov.shape[2]))
    
    for j in range(cov.shape[2]):
        
        cov_j[:,:,j] = (1/np.sum(r, axis=0)[j]) * np.sum(Allcov_j, axis=0)[:,:,j]
    
    return w_j,mu_j,cov_j

随机初始化两次

每次EM算法迭代10次

In [126]:
Inititeration = 2
EMiteration = 10
lookLH = 20

for init in range(Inititeration):
    
    # starting values
    initMu = np.empty([NClasses, NProperties])
    for j in range(NClasses):
        initMu[j,:] = np.random.random(NProperties)*np.amax(y, axis=0)
        
    r_n = EStep(y, initW, initMu, initCov)
    w_n,mu_n,cov_n = MStep(r_n, y, initMu, initCov)
    
    if init == 0:
        logLH = -1000000000000
        
    for i in range(EMiteration):

        # E step
        r_n = EStep(y, w_n, mu_n, cov_n)

        # M step
        w_n,mu_n,cov_n = MStep(r_n, y, mu_n, cov_n)

        # compute log likelihood
        logLall = np.zeros((y.shape[0]))

        for Object in range(y.shape[0]):

            LH = np.zeros(NClasses)

            for jClass in range(NClasses):
                LH[jClass] = w_n[jClass] * sc.stats.multivariate_normal.pdf(y[Object,:], mu_n[jClass,:], cov_n[:,:,jClass])

            logLall[Object] = np.log(np.sum(LH))

        logL = np.sum(logLall)

        if i > EMiteration - lookLH:
            print (logL)

    if logL > logLH:
        logLH = logL
        print ('found larger: ', logLH)
        w_p = w_n
        mu_p = mu_n
        cov_p = cov_n
        r_p = r_n

-26018.722672266704
-25988.067450737264
-25967.84274320457
-25950.43195704537
-25934.23048802923
-25919.528401798194
-25906.936121658273
-25896.031548897896
-25885.520788794718
-25875.21358385885
found larger:  -25875.21358385885
-26009.1528924968
-25987.094792163312
-25973.5626658798
-25960.140627496286
-25944.605285913916
-25928.42089791997
-25913.12129731738
-25897.65971066233
-25882.23161305108
-25870.30442305726
found larger:  -25870.30442305726


给出估计结果

In [133]:
print ('The inferred mixing proportions: ',w_p)
print ('------------------')

for i in range(0,NClasses):
    print (['The Inferred means'], mu_p[i,:])
print ('------------------')

for i in range(0,NClasses):
    print (['the Inferred variances' ])
    print (sigma_p[:,:,i])

The inferred mixing proportions:  [0.14069457 0.10303517 0.0319473  0.05942175 0.39097681 0.14426707
 0.07685301 0.05280431]
------------------
['The Inferred means'] [-349.5880331   168.09557035  -33.64956422  -44.30324643  -44.45115654
   -6.79656828  -11.04387043  -28.14832335  -20.36969452   -0.63010176
   -5.35554765   -7.58067372]
['The Inferred means'] [-462.985099    110.36588737   23.6452472    15.65460076   12.73252117
    6.83680257   -2.8065137     6.12406684   -2.80950251   -1.84943512
  -10.53833116   -1.16473769]
['The Inferred means'] [-142.6666634   113.00801032  -39.05947845   33.46882647  -26.40042595
  -12.98691878  -33.13271991  -25.35947012   -4.93063708  -24.06571119
  -11.89603221   -9.17142071]
['The Inferred means'] [-371.76377428  127.22155505   -4.23340353    2.13476453   13.39603847
    0.57989868  -27.39374542   11.17494498   -9.34112353   -2.28911923
  -16.32636714    4.63711929]
['The Inferred means'] [-5.36157506e+02  6.16775757e+01  9.29167235e+00  1.4

Todo

* 训练出十个数字的模型后测试（计算测试样本在每个模型中的logLH后，最大的对应的数字即为估计的结果）

* 利用二分法挑选k而不是使用给定的

* 修改结束条件不是给定轮数

## 为每个数字训练GMM模型

In [21]:
# parameters
NProperties = 12
NClasses = 8
models = []

for num in range(len(train)):
    number = train[num]
    y = np.array(number)
    NObjects = y.shape[0]

    Inititeration = 2
    EMiteration = 10

    for init in range(Inititeration):
    
        # starting values
        initMu = np.empty([NClasses, NProperties])
        for j in range(NClasses):
            initMu[j,:] = np.random.random(NProperties)*np.amax(y, axis=0)

        r_n = EStep(y, initW, initMu, initCov)
        w_n,mu_n,cov_n = MStep(r_n, y, initMu, initCov)

        if init == 0:
            logLH = -1000000000000

        for i in range(EMiteration):

            # E step
            r_n = EStep(y, w_n, mu_n, cov_n)

            # M step
            w_n,mu_n,cov_n = MStep(r_n, y, mu_n, cov_n)

            # compute log likelihood
            logLall = np.zeros((y.shape[0]))

            for Object in range(y.shape[0]):

                LH = np.zeros(NClasses)

                for jClass in range(NClasses):
                    LH[jClass] = w_n[jClass] * sc.stats.multivariate_normal.pdf(y[Object,:], mu_n[jClass,:], cov_n[:,:,jClass])

                logLall[Object] = np.log(np.sum(LH))

            logL = np.sum(logLall)

        if logL > logLH:
            logLH = logL
            w_p = w_n
            mu_p = mu_n
            cov_p = cov_n
            r_p = r_n
    
    print("%d done." % num)
    
    models.append([w_p, mu_p, cov_p, r_p])

0 done.
1 done.
2 done.
3 done.
4 done.
5 done.
6 done.
7 done.
8 done.
9 done.


保存训练好的模型

In [22]:
# save the model
# m = np.array(models)
# np.save('demo.npy',m)

## 在测试集上预测

载入训练好的模型

In [23]:
# load the model
m = np.load('demo.npy')
models = m.tolist()

In [29]:
right = 0
predict = []

for sample in test:

    for i in range(len(models)):
        
        if i == 0:
            logLH = -1000000000000
        
        w_n = models[i][0]
        mu_n = models[i][1]
        cov_n = models[i][2]
        r_n = models[i][3]
        # compute log likelihood
        y = np.array(sample[0])
        logLall = np.zeros((y.shape[0]))

        for Object in range(y.shape[0]):

            LH = np.zeros(NClasses)

            for jClass in range(NClasses):
                LH[jClass] = w_n[jClass] * sc.stats.multivariate_normal.pdf(y[Object,:], mu_n[jClass,:], cov_n[:,:,jClass])

            logLall[Object] = np.log(np.sum(LH))

        logL = np.sum(logLall)

        if logL > logLH:
            logLH = logL
            res = i
        
    if res == sample[1]:
        right += 1
    
    predict.append([res, sample[1]])
        
    print("another sample is predicted.")


print(right/len(test))

another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
another sample is predicted.
0.5


In [31]:
print("预测值 真实值")
predict

预测值 真实值


[[0, 0],
 [0, 0],
 [0, 1],
 [7, 1],
 [0, 2],
 [2, 2],
 [0, 3],
 [3, 3],
 [4, 4],
 [4, 4],
 [5, 5],
 [3, 5],
 [6, 6],
 [6, 6],
 [7, 7],
 [3, 7],
 [0, 8],
 [0, 8],
 [5, 9],
 [3, 9]]

正确率达到50%