In [1]:
import os
import random
from io import open
import unicodedata
import string
import re

import torch
import torchaudio
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from pathlib import Path
import kaldi_io
import math
import torch.utils.data as Data

from lib.Data_show import Data_show
from lib.Phone_cla_Dataset import Phone_cla_Dataset
from lib.Decoder import Decoder
from collections import Counter

%matplotlib inline


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"]="3"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## 加载数据SLR85训练集

In [4]:
SLR_feats = { u:d for u,d in kaldi_io.read_mat_scp("/home1/meichaoyang/Dataset/feats/SLR85/hifi_16k/train/feats.scp") }
SLR_feats_dev = { u:d for u,d in kaldi_io.read_mat_scp("/home1/meichaoyang/Dataset/feats/SLR85/hifi_16k/dev/feats.scp") }

In [5]:
phone_label = { u:d for u,d in kaldi_io.read_vec_int_ark("feats/ali.1.ph") }

In [None]:
feats_head_300k = { u:d for u,d in kaldi_io.read_mat_scp("../../wake_up_align_44_1k/train_fbank/feats_head_300000.scp") }
feats_mid_300k = { u:d for u,d in kaldi_io.read_mat_scp("../../wake_up_align_44_1k/train_fbank/feats_mid_300000.scp") }
feats_tail_300k = { u:d for u,d in kaldi_io.read_mat_scp("../../wake_up_align_44_1k/train_fbank/feats_tail_300000.scp") }

In [7]:
feats = { u:d for u,d in kaldi_io.read_mat_scp("../../git/phone_align_more_44k/train_fbank/feats.scp") }

In [41]:
feats_new={}
for key,values in feats.items():
    if "SV" not in key:
        if random.random() < 0.3:
            feats_new[key] = values
feat_train = {**feats_new, **SLR_feats}

In [42]:
len(feat_train)

46921

In [43]:
len(feats_new)

26654

In [44]:
phone_label["SV0255_7_01_S3881"]

array([  1,   1, 128, 128, 128,  62,  62,  62,  60,  60,  60,  60,  60,
        26,  26,  26,  26,  26,  26,  26,  26,  26,  26,   1,   1,   1,
         1, 127, 127, 127,  63,  63,  63,  63,  91,  91,  68,  68,  68,
        68,  68,  68,   1,   1,   1,   1])

In [13]:
Data_show({1:0, 128:1, 62:2, 60:3, 26:4, 127:5, 63:6, 91:7, 68:8})

<lib.Data_show.Data_show at 0x7f0965029b38>

In [45]:
train_dataset = Phone_cla_Dataset(phone_label, feat_train)

In [46]:
dev_dataset = Phone_cla_Dataset(phone_label, SLR_feats_dev)

In [47]:
len(dev_dataset)

3360

In [48]:
print(train_dataset.phone_label_nd.shape)
print(train_dataset.feats_nd.shape)

(12391008,)
(12391008, 40)


In [49]:
train_data = torch.Tensor(train_dataset.feats_nd).to(device)
train_label = torch.LongTensor(train_dataset.phone_label_nd).to(device)
dev_data = torch.Tensor(dev_dataset.feats_nd).to(device)
dev_label = torch.LongTensor(dev_dataset.phone_label_nd).to(device)

In [50]:
print('Counter(data)\n',Counter(train_dataset.phone_label_nd))

Counter(data)
 Counter({9: 6749608, 0: 2871882, 8: 644661, 4: 422730, 1: 375609, 3: 359376, 6: 345714, 7: 261774, 5: 224106, 2: 135548})


# 模型搭建

In [51]:
class DNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 128)
        self.fc4 = nn.Linear(128, 128)
        self.fc5 = nn.Linear(128, num_classes)


    def forward(self, input):
        x = torch.sigmoid(self.fc1(input))
        x = torch.sigmoid(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        x = torch.sigmoid(self.fc4(x))
        x = self.fc5(x)
        
        return x

## 训练迭代

In [78]:
LEARNING_RATE = 0.001  #0.001
EPOCH = 10      #400 best
BATCH_SIZE = 150
input_size=40
num_classes=10

In [79]:
print(train_data.shape)
print(train_label.shape)

torch.Size([12391008, 40])
torch.Size([12391008])


In [None]:
training_set = Data.TensorDataset(train_data,
                                  train_label)
training_loader = Data.DataLoader(dataset=training_set,
                                      batch_size=BATCH_SIZE,
                                      shuffle=True)
testing_set = Data.TensorDataset(dev_data,
                                 dev_label)
testing_loader = Data.DataLoader(dataset=testing_set,
                                     batch_size=BATCH_SIZE,
                                     shuffle=False)
model = DNN(input_size, num_classes).to(device)
criterion = nn.CrossEntropyLoss(weight=torch.tensor([3.5,3.0,3.0,2.5,2.5,5,2.0,2.5,1.5,1.0]).to(device))

optim = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
for epoch in range(EPOCH):
    correct_train = 0
    total_train = 0
    
    total_loss = 0
    ite = 0
    print_every = 2000
    for (data, label) in training_loader:
        ite +=1
        data = data
        label = label
        pred_label = model(data)
        loss = criterion(pred_label, label)
        optim.zero_grad()
        loss.backward()
        total_loss += loss.item()
        optim.step()
        _, answer = torch.max(pred_label.data, 1)
        total_train += label.size(0)
        correct_train += (answer == label).sum()
        if ite % print_every == 0:
            print("total_loss:",total_loss/print_every, "\tloss:",loss.item())
            total_loss=0
    print('Epoch {:3d} Accuracy on training data: {}% ({}/{})'
          .format(epoch, (100 * correct_train / total_train), correct_train, total_train))
    # pytorch 0.4 feature, not calculate grad on test set
    with torch.no_grad():
        correct_test = 0
        total_test = 0
        for (data, label) in testing_loader:
            pred_label = model(data)
            _, answer = torch.max(pred_label.data, 1)
            total_test += label.size(0)
            correct_test += (answer == label).sum()
        print('          Accuracy on testing data: {}% ({}/{})'
              .format((100 * correct_test / total_test), correct_test, total_test))



total_loss: 1.088012345403433 	loss: 0.9030971527099609
total_loss: 0.8672060132324696 	loss: 0.9089928269386292
total_loss: 0.829487093180418 	loss: 0.7300177812576294
total_loss: 0.7817974606305361 	loss: 0.9302864670753479
total_loss: 0.7577167164832354 	loss: 0.7709261775016785
total_loss: 0.7375916499942541 	loss: 0.7058442831039429
total_loss: 0.719573468491435 	loss: 0.5505875945091248
total_loss: 0.6957471581846476 	loss: 0.6818448901176453
total_loss: 0.6876599763631821 	loss: 0.5248100757598877
total_loss: 0.677106169745326 	loss: 0.641268253326416
total_loss: 0.6681067259162664 	loss: 0.6495600342750549
total_loss: 0.663715070053935 	loss: 0.7020452618598938
total_loss: 0.656351369008422 	loss: 0.7155068516731262
total_loss: 0.6569087934941054 	loss: 0.7084497809410095
total_loss: 0.6544713697433472 	loss: 0.579433023929596
total_loss: 0.6557699773907661 	loss: 0.6422834396362305
total_loss: 0.6490264546722174 	loss: 0.5124652981758118
total_loss: 0.6481849453598261 	loss: 0

### 预测

In [None]:
# model = torch.load('model.pkl')
# model.to(device)

In [59]:
test_feats = { u:d for u,d in kaldi_io.read_mat_scp("/home1/meichaoyang/workspace/align_44_1k/test_feat/feats.scp")}
test_feats.keys()

dict_keys(['miya_mcy', 'miya_mcy1', 'miya_mingzhang', 'miya_mingzhang1'])

In [60]:
feat = test_feats["miya_mcy1"]
feats_list = []
for i in range(20,feat.shape[0]-9):
    input_data=feat[i-20:i+10].reshape(1,-1)
    feats_list.append(input_data)
feats_nd = np.concatenate(tuple(feats_list))

In [61]:
i=30
input_feat = feat[i-20:i+10].reshape(1,-1)

In [63]:
# test_feats = { u:d for u,d in kaldi_io.read_mat_scp("/home1/meichaoyang/workspace/align_44_1k/test_feat/feats.scp")}

pred_label = model(torch.Tensor(test_feats["miya_mingzhang1"]).to(device))
_, answer = torch.max(pred_label.data, 1)
answer_list=list(answer.to("cpu", torch.int).numpy())

In [448]:
utt='SV0252_7_01_S3887'
# utt="SV0255_7_01_S3881"
# utt=list(phone_label.keys())[110]

# label_list = list(phone_label_dev[utt])
pred_label = model(torch.Tensor(SLR_feats_dev[utt])) #.to(device)
_, answer = torch.max(pred_label.data, 1)
answer_list=list(answer.to("cpu", torch.int).numpy())

KeyError: 'SV0252_7_01_S3887'

In [438]:
utt=list(feats_aishell.keys())[110]

label_list = list(feats_aishell[utt])
pred_label = model(torch.Tensor(feats_aishell[utt]))#.to(device)
_, answer = torch.max(pred_label.data, 1)
answer_list=list(answer.to("cpu", torch.int).numpy())

## 将预测结果映射

In [65]:
title, content = Data_show().show_softmax(torch.nn.Softmax()(pred_label))

  """Entry point for launching an IPython kernel.


In [66]:
print(title,"\n",content)

	sil	nsn	n	i2	h	ao3	m	i3	ii	ia3	other
 
 0:	0.92	0.07	0.00	0.00	0.00	0.01	0.00	0.00	0.00	0.00	
1:	0.92	0.07	0.00	0.00	0.00	0.01	0.00	0.00	0.00	0.00	
2:	0.92	0.06	0.00	0.00	0.00	0.01	0.00	0.00	0.00	0.02	
3:	0.92	0.06	0.00	0.00	0.00	0.01	0.00	0.00	0.01	0.01	
4:	0.91	0.06	0.00	0.00	0.00	0.01	0.00	0.00	0.01	0.00	
5:	0.86	0.04	0.00	0.00	0.01	0.01	0.00	0.00	0.00	0.08	
6:	0.84	0.01	0.00	0.00	0.00	0.01	0.00	0.00	0.00	0.15	
7:	0.84	0.05	0.00	0.00	0.00	0.01	0.00	0.00	0.05	0.04	
8:	0.93	0.05	0.00	0.00	0.00	0.01	0.00	0.00	0.01	0.00	
9:	0.90	0.05	0.00	0.00	0.00	0.01	0.00	0.00	0.02	0.02	
10:	0.92	0.05	0.00	0.00	0.00	0.01	0.00	0.00	0.02	0.00	
11:	0.91	0.07	0.00	0.00	0.00	0.01	0.00	0.00	0.00	0.01	
12:	0.90	0.03	0.00	0.00	0.00	0.01	0.00	0.00	0.00	0.05	
13:	0.91	0.08	0.00	0.00	0.00	0.01	0.00	0.00	0.00	0.00	
14:	0.92	0.04	0.00	0.00	0.01	0.01	0.00	0.00	0.00	0.02	
15:	0.89	0.01	0.00	0.00	0.00	0.01	0.00	0.00	0.00	0.08	
16:	0.91	0.06	0.00	0.00	0.00	0.01	0.00	0.00	0.00	0.02	
17:	0.91	0.02	0.00	0.00	0.00	0.01	

In [441]:
word_label_new[utt]

KeyError: 'IC0500W0447'

### “你好米雅”测试

In [None]:
feats_miya_test = { u:d for u,d in kaldi_io.read_mat_scp("/home1/meichaoyang/Dataset/feats/SLR85/far_field/train/feats.scp") }

In [None]:
utt_miya_test=list(feats_miya_test.keys())[420]
# utt_aishell="IC0001W0406"

pred_label_miya_test = model(torch.Tensor(feats_miya_test[utt_miya_test]).to(device))
_, answer_miya_test = torch.max(pred_label_miya_test.data, 1)
answer_miya_test_list=list(answer_miya_test.to("cpu", torch.int).numpy())

In [None]:
title, content = Data_show().show_softmax(torch.nn.Softmax()(pred_label_miya_test))

In [None]:
print(title)
print(content)

In [None]:
feats[utt_miya_test]

## 非“你好米雅”测试

In [72]:
feats_aishell = { u:d for u,d in kaldi_io.read_mat_scp("../../wake_dnn_miya_only/feats_aishell2_test/feats.scp") }

In [73]:
utt_aishell=list(feats_aishell.keys())[420]
utt_aishell="IC0001W0406"

pred_label_aishell = model(torch.Tensor(feats_aishell[utt_aishell]).to(device))
_, answer_aishell = torch.max(pred_label_aishell.data, 1)
answer_aishell_list=list(answer_aishell.to("cpu", torch.int).numpy())

In [74]:
decoder.show_result(decoder.decode(torch.nn.Softmax()(pred_label_aishell)))

NameError: name 'decoder' is not defined

In [76]:
title1, content1 = Data_show().show_softmax(torch.nn.Softmax()(pred_label_aishell))

  """Entry point for launching an IPython kernel.


In [77]:
print(content1)

0:	0.97	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.02	
1:	0.97	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.02	
2:	0.97	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.02	
3:	0.96	0.00	0.00	0.00	0.00	0.00	0.00	0.01	0.00	0.02	
4:	0.94	0.00	0.00	0.00	0.00	0.00	0.00	0.02	0.00	0.03	
5:	0.97	0.00	0.00	0.00	0.00	0.00	0.00	0.01	0.00	0.02	
6:	0.97	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.02	
7:	0.97	0.00	0.00	0.00	0.00	0.00	0.00	0.01	0.00	0.02	
8:	0.97	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.02	
9:	0.97	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.02	
10:	0.97	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.02	
11:	0.97	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.02	
12:	0.97	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.02	
13:	0.50	0.00	0.00	0.00	0.00	0.00	0.00	0.01	0.00	0.48	
14:	0.74	0.00	0.00	0.00	0.00	0.00	0.00	0.05	0.00	0.20	
15:	0.94	0.00	0.00	0.00	0.00	0.00	0.00	0.02	0.00	0.04	
16:	0.97	0.00	0.00	0.00	0.00	0.00	0.00	0.01	0.00	0.02	
17:	0.97	0.00	0.00	0.00	0.00	0.00	0.00	0.01	0.00	0.02	
18:	0.96	0.00	0.00	0

## 保存模型

In [67]:
torch.save(model.to("cpu"), 'model.pkl')
model1 = torch.load('model.pkl')

  "type " + obj.__name__ + ". It won't be checked "


In [68]:
sm = torch.jit.script(model1)
sm.save("phone_cla_model.pt")

In [None]:
model1 = torch.load('model.pkl')

In [None]:
model1

In [None]:
map1={1:"1-1",2:"2-1",3:"3-1"}

In [None]:
map2={2:"2-2",3:"3-2",4:"4-2"}

In [None]:
{**map1,**map2}

In [None]:
{**map2,**map1}