In [1]:
import os
import random
from io import open
import unicodedata
import string
import re

import torch
import torchaudio
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from pathlib import Path
import kaldi_io
import math
import torch.utils.data as Data

from lib.Data_show import Data_show
from lib.Phone_cla_Dataset import Phone_cla_Dataset
from lib.Decoder import Decoder

%matplotlib inline


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"]="3"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## 加载数据

In [3]:
phone_label = { u:d for u,d in kaldi_io.read_vec_int_ark("feats/ali_chain.1.ph") }
feats = { u:d for u,d in kaldi_io.read_mat_scp("../wake_up_align_sil_shared_recorp/train_fbank/feats.scp") }

In [4]:
print(len(feats))
print(len(phone_label))

96489
96489


## 自定义数据集

In [5]:
Data_show().phone2class

{1: 0, 3: 0, 129: 1, 63: 2, 61: 3, 27: 4, 128: 5, 64: 6, 92: 7, 69: 8}

In [6]:
utt="SV0287_6_15_N3046"
a=np.zeros(feats[utt].shape[0], int)
for i in range(a.shape[0]):
    print("i:\t", i, "\t",(i+1)//3)
    a[i]=phone_label[utt][(i)//3]

i:	 0 	 0
i:	 1 	 0
i:	 2 	 1
i:	 3 	 1
i:	 4 	 1
i:	 5 	 2
i:	 6 	 2
i:	 7 	 2
i:	 8 	 3
i:	 9 	 3
i:	 10 	 3
i:	 11 	 4
i:	 12 	 4
i:	 13 	 4
i:	 14 	 5
i:	 15 	 5
i:	 16 	 5
i:	 17 	 6
i:	 18 	 6
i:	 19 	 6
i:	 20 	 7
i:	 21 	 7
i:	 22 	 7
i:	 23 	 8
i:	 24 	 8
i:	 25 	 8
i:	 26 	 9
i:	 27 	 9
i:	 28 	 9
i:	 29 	 10
i:	 30 	 10
i:	 31 	 10
i:	 32 	 11
i:	 33 	 11
i:	 34 	 11
i:	 35 	 12
i:	 36 	 12
i:	 37 	 12
i:	 38 	 13
i:	 39 	 13
i:	 40 	 13
i:	 41 	 14
i:	 42 	 14
i:	 43 	 14
i:	 44 	 15
i:	 45 	 15
i:	 46 	 15
i:	 47 	 16
i:	 48 	 16
i:	 49 	 16
i:	 50 	 17
i:	 51 	 17
i:	 52 	 17
i:	 53 	 18
i:	 54 	 18
i:	 55 	 18
i:	 56 	 19
i:	 57 	 19
i:	 58 	 19
i:	 59 	 20
i:	 60 	 20
i:	 61 	 20
i:	 62 	 21
i:	 63 	 21
i:	 64 	 21
i:	 65 	 22
i:	 66 	 22
i:	 67 	 22
i:	 68 	 23
i:	 69 	 23
i:	 70 	 23
i:	 71 	 24
i:	 72 	 24
i:	 73 	 24
i:	 74 	 25
i:	 75 	 25
i:	 76 	 25
i:	 77 	 26
i:	 78 	 26
i:	 79 	 26
i:	 80 	 27
i:	 81 	 27
i:	 82 	 27
i:	 83 	 28
i:	 84 	 28
i:	 85 	 28
i:	 86 

In [7]:
data_set_train = Phone_cla_Dataset(phone_label, feats)

In [8]:
Phone_cla_Dataset.class_trans_vector(phone_label["SV0287_6_15_N3046"]) #Phone_cla_Dataset初始化之后可使用

array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8, 8,
       8, 8, 0, 0])

In [9]:
data_set_train.phone_label_nd[20:51]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9])

In [10]:
print(data_set_train.feats_nd.shape)
print(data_set_train.phone_label_nd.shape)
print(data_set_train.phone_label_nd[14440478])

(23377008, 40)
(23377008,)
9


In [None]:
train_data = torch.Tensor(data_set_train.feats_nd).to(device)
train_label = torch.LongTensor(data_set_train.phone_label_nd).to(device)
# test_data = torch.Tensor(data_set_dev.feats_nd).to(device)
# test_label = torch.LongTensor(data_set_dev.phone_label_nd).to(device)

# 模型搭建

In [None]:
class DNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 128)
        self.fc4 = nn.Linear(128, 128)
        self.fc5 = nn.Linear(128, num_classes)


    def forward(self, input):
        x = torch.sigmoid(self.fc1(input))
        x = torch.sigmoid(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        x = torch.sigmoid(self.fc4(x))
        x = self.fc5(x)
        
        return x

## 训练迭代

In [None]:
LEARNING_RATE = 0.001  #0.001
EPOCH = 10        #400 best
BATCH_SIZE = 150
input_size=40
num_classes=10

In [None]:
print(train_data.shape)
print(train_label.shape)

In [None]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# prepare the data loader
training_set = Data.TensorDataset(train_data,
                                  train_label)
training_loader = Data.DataLoader(dataset=training_set,
                                      batch_size=BATCH_SIZE,
                                      shuffle=True)
# testing_set = Data.TensorDataset(test_data,
#                                  test_label)
# testing_loader = Data.DataLoader(dataset=testing_set,
#                                      batch_size=BATCH_SIZE,
#                                      shuffle=False)
model = DNN(input_size, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
for epoch in range(EPOCH):
    correct_train = 0
    total_train = 0
    
    total_loss = 0
    ite = 0
    print_every = 2000
    for (data, label) in training_loader:
        ite +=1
        data = data
        label = label
        pred_label = model(data)
        loss = criterion(pred_label, label)
        optim.zero_grad()
        loss.backward()
        total_loss += loss.item()
        optim.step()
        _, answer = torch.max(pred_label.data, 1)
        total_train += label.size(0)
        correct_train += (answer == label).sum()
        if ite % print_every == 0:
            print("total_loss:",total_loss/print_every, "\tloss:",loss.item())
            total_loss=0
    print('Epoch {:3d} Accuracy on training data: {}% ({}/{})'
          .format(epoch, (100 * correct_train / total_train), correct_train, total_train))
    # pytorch 0.4 feature, not calculate grad on test set
#     with torch.no_grad():
#         correct_test = 0
#         total_test = 0
#         for (data, label) in testing_loader:
#             pred_label = model(data)
#             _, answer = torch.max(pred_label.data, 1)
#             total_test += label.size(0)
#             correct_test += (answer == label).sum()
#         print('          Accuracy on testing data: {}% ({}/{})'
#               .format((100 * correct_test / total_test), correct_test, total_test))


### 预测

In [None]:
model = torch.load('model.pkl')
model.to(device)

In [None]:
# utt='SV0255_2_00_F0021'
# utt="SV0255_2_07_S1082"
utt=list(phone_label.keys())[110]

# label_list = list(phone_label_dev[utt])
pred_label = model(torch.Tensor(feats[utt]).to(device))
_, answer = torch.max(pred_label.data, 1)
answer_list=list(answer.to("cpu", torch.int).numpy())

In [None]:
utt="SV0287_6_12_S2850"

# label_list = list(phone_label_dev[utt])
pred_label = model(torch.Tensor(feats[utt]).to(device))
_, answer = torch.max(pred_label.data, 1)
answer_list=list(answer.to("cpu", torch.int).numpy())

## 将预测结果映射

In [21]:
decoder = Decoder(Data_show.phone2class)

In [27]:
title1, content1 = decoder.show_result(decoder.decode(torch.nn.Softmax()(pred_label)))

  """Entry point for launching an IPython kernel.


In [28]:
title, content = Data_show().show_softmax(torch.nn.Softmax()(pred_label))

  """Entry point for launching an IPython kernel.


In [31]:
print(title, content)

	sil	nsn	n	i2	h	ao3	m	i3	ii	ia3	other
 0:	0.97	0.00	0.00	0.00	0.01	0.01	0.00	0.00	0.01	0.00	
1:	0.96	0.00	0.00	0.00	0.01	0.02	0.00	0.00	0.00	0.00	
2:	0.99	0.00	0.00	0.00	0.01	0.01	0.00	0.00	0.00	0.00	
3:	0.99	0.00	0.00	0.00	0.00	0.01	0.00	0.00	0.00	0.00	
4:	0.99	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	
5:	0.98	0.00	0.00	0.00	0.00	0.01	0.00	0.00	0.01	0.00	
6:	0.93	0.00	0.00	0.00	0.00	0.01	0.00	0.00	0.06	0.00	
7:	0.98	0.00	0.00	0.00	0.00	0.01	0.00	0.00	0.00	0.00	
8:	0.93	0.00	0.00	0.00	0.03	0.03	0.00	0.00	0.01	0.00	
9:	0.98	0.00	0.00	0.00	0.01	0.01	0.00	0.00	0.00	0.00	
10:	0.84	0.00	0.00	0.00	0.15	0.01	0.00	0.00	0.00	0.00	
11:	0.96	0.00	0.00	0.00	0.03	0.01	0.00	0.00	0.00	0.00	
12:	0.97	0.00	0.00	0.00	0.02	0.01	0.00	0.00	0.00	0.00	
13:	0.93	0.00	0.00	0.00	0.06	0.01	0.00	0.00	0.00	0.00	
14:	0.98	0.00	0.00	0.00	0.01	0.01	0.00	0.00	0.00	0.00	
15:	0.91	0.00	0.00	0.00	0.04	0.02	0.00	0.00	0.03	0.00	
16:	0.96	0.00	0.00	0.00	0.02	0.01	0.00	0.00	0.01	0.00	
17:	0.98	0.00	0.00	0.00	0.01	0.01	0.

In [30]:
Phone_cla_Dataset.class_trans_vector(phone_label[utt])

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4,
       4, 4, 4, 4, 4, 4, 0, 0, 5, 5, 5, 5, 5, 5, 6, 7, 7, 8, 8, 8, 8, 8,
       8, 8, 8, 0, 0, 0])

### “你好米雅”测试

In [32]:
feats_miya_test = { u:d for u,d in kaldi_io.read_mat_scp("/home1/meichaoyang/Dataset/feats/SLR85/far_field/train/feats.scp") }

In [39]:
utt_miya_test=list(feats_miya_test.keys())[421]
# utt_aishell="IC0001W0406"

pred_label_miya_test = model(torch.Tensor(feats_miya_test[utt_miya_test]).to(device))
_, answer_miya_test = torch.max(pred_label_miya_test.data, 1)
answer_miya_test_list=list(answer_miya_test.to("cpu", torch.int).numpy())

In [40]:
title, content = Data_show().show_softmax(torch.nn.Softmax()(pred_label_miya_test))

  """Entry point for launching an IPython kernel.


In [41]:
print(title, content)

	sil	nsn	n	i2	h	ao3	m	i3	ii	ia3	other
 0:	0.37	0.00	0.00	0.00	0.00	0.02	0.00	0.00	0.60	0.00	
1:	0.27	0.02	0.02	0.01	0.01	0.01	0.00	0.00	0.66	0.00	
2:	0.22	0.08	0.00	0.00	0.00	0.03	0.00	0.00	0.67	0.00	
3:	0.20	0.01	0.00	0.00	0.01	0.05	0.00	0.00	0.71	0.01	
4:	0.51	0.01	0.01	0.00	0.02	0.03	0.00	0.00	0.42	0.00	
5:	0.24	0.21	0.32	0.04	0.01	0.03	0.00	0.00	0.16	0.00	
6:	0.33	0.27	0.02	0.00	0.00	0.06	0.00	0.03	0.29	0.00	
7:	0.22	0.58	0.08	0.00	0.00	0.10	0.01	0.00	0.01	0.00	
8:	0.45	0.36	0.09	0.01	0.00	0.07	0.01	0.00	0.02	0.00	
9:	0.04	0.38	0.51	0.05	0.00	0.01	0.00	0.00	0.02	0.00	
10:	0.06	0.63	0.19	0.01	0.00	0.02	0.00	0.00	0.09	0.00	
11:	0.08	0.43	0.29	0.03	0.00	0.02	0.00	0.00	0.14	0.00	
12:	0.18	0.11	0.04	0.01	0.00	0.03	0.00	0.00	0.62	0.00	
13:	0.22	0.10	0.02	0.00	0.00	0.09	0.00	0.00	0.57	0.00	
14:	0.05	0.36	0.43	0.06	0.00	0.01	0.00	0.00	0.09	0.00	
15:	0.17	0.14	0.08	0.02	0.00	0.01	0.00	0.00	0.58	0.00	
16:	0.25	0.25	0.03	0.01	0.01	0.02	0.00	0.00	0.42	0.00	
17:	0.33	0.16	0.08	0.01	0.00	0.09	0.

In [38]:
utt_miya_test

'SV0002_2_08_S1171'

## 非“你好米雅”测试

In [None]:
feats_aishell = { u:d for u,d in kaldi_io.read_mat_scp("../wake_dnn_miya_only/feats_aishell2_test/feats.scp") }

In [None]:
utt_aishell=list(feats_aishell.keys())[420]
utt_aishell="IC0001W0406"

pred_label_aishell = model(torch.Tensor(feats_aishell[utt_aishell]).to(device))
_, answer_aishell = torch.max(pred_label_aishell.data, 1)
answer_aishell_list=list(answer_aishell.to("cpu", torch.int).numpy())

In [None]:
decoder.show_result(decoder.decode(torch.nn.Softmax()(pred_label_aishell)))

In [None]:
Data_show().show_softmax(torch.nn.Softmax()(pred_label_aishell))

In [None]:
utt_aishell

## 保存模型

In [None]:
torch.save(model.to("cpu"), 'model.pkl')
model1 = torch.load('model.pkl')

In [None]:
sm = torch.jit.script(model1)
sm.save("phone_cla_model.pt")

In [None]:
model1 = torch.load('model.pkl')

In [None]:
model1