### 匯入相關 library

In [1]:
import jieba
import re
import heapq
import pickle
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf

# jieba settings
jieba.set_dictionary('dict.txt.big')
jieba.load_userdict("userdict0722.txt")

#padding settings
max_length = 20
trunc_type='post'
padding_type='post' # 0 加在尾端

# tokenizer settings
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

Building prefix dict from C:\ANDY\cathay_cip_留給國泰\dict.txt.big ...
Dumping model to file cache C:\Users\user\AppData\Local\Temp\jieba.u5d816dfa46dfe1e9a9efcb0a44246464.cache
Loading model cost 1.161 seconds.
Prefix dict has been built successfully.


In [2]:
# transfer labels to num
label_index_dict = {
    '數位金融 - 數位存款帳戶': 0,
    '數位金融 - 網路銀行': 1,
    '數位金融 - 行動銀行': 2,
    '數位金融 - LINE個人化服務': 3,
    '數位金融 - 網路ATM': 4,
    '行動支付 - Fitbit Pay': 5,
    '行動支付 - Hami Pay': 6,
    '基金投資': 7,
    '貸款': 8,
    '信託': 9,
    'ATM相關服務': 10,
    '信用卡': 11,
    '綜合對帳單': 12,
    'MyBill 輕鬆繳': 13,
    '存款帳戶': 14,
    0: '數位金融 - 數位存款帳戶',
    1: '數位金融 - 網路銀行',
    2: '數位金融 - 行動銀行',
    3: '數位金融 - LINE個人化服務',
    4: '數位金融 - 網路ATM',
    5: '行動支付 - Fitbit Pay',
    6: '行動支付 - Hami Pay',
    7: '基金投資',
    8: '貸款',
    9: '信託',
    10: 'ATM相關服務',
    11: '信用卡',
    12: '綜合對帳單',
    13: 'MyBill 輕鬆繳', 
    14: '存款帳戶',
}

## 驗證與測試

### 資料前處理 -- jieba 斷詞、去除標點符號、英文轉小寫

In [3]:
user_sentences = '我想要看一下我的信用卡繳款紀錄'

In [4]:
#去除標點符號、全部轉為小寫
reg = "[^0-9A-Za-z\u4e00-\u9fa5]"
user_sentences_without_punctuation = re.sub(reg,'', user_sentences)
user_sentences_without_punctuation = user_sentences_without_punctuation.lower()
print(user_sentences_without_punctuation)

# jieba
words = jieba.cut(user_sentences_without_punctuation, cut_all=False)
sentence_split = ''
for word in words:
    sentence_split += ' ' + word
user_sentences_split = sentence_split

我想要看一下我的信用卡繳款紀錄


In [5]:
print(user_sentences_split)

 我 想要 看 一下 我 的 信用卡 繳款 紀錄


### tokenlize

In [6]:
tokenizer.word_index

{'<OOV>': 1,
 '如何': 2,
 '的': 3,
 '嗎': 4,
 '銀行': 5,
 '申請': 6,
 '使用': 7,
 '可以': 8,
 '我': 9,
 '信用卡': 10,
 '網路': 11,
 '什麼': 12,
 '有': 13,
 '是否': 14,
 'atm': 15,
 '交易': 16,
 '轉帳': 17,
 '或': 18,
 '在': 19,
 '需要': 20,
 '是': 21,
 '會': 22,
 '請問': 23,
 '服務': 24,
 '國泰': 25,
 '該': 26,
 '世華': 27,
 '帳戶': 28,
 '為': 29,
 '要': 30,
 '行動': 31,
 '後': 32,
 '辦理': 33,
 '金融卡': 34,
 '提款': 35,
 '基金': 36,
 '怎麼辦': 37,
 '哪些': 38,
 '金額': 39,
 '密碼': 40,
 '卡片': 41,
 '限制': 42,
 'app': 43,
 '帳單': 44,
 '繳費': 45,
 '設定': 46,
 '透過': 47,
 '呢': 48,
 '對帳單': 49,
 '上': 50,
 '之': 51,
 '了': 52,
 '為何': 53,
 '通知': 54,
 '計算': 55,
 '自動': 56,
 '與': 57,
 '可': 58,
 '查詢': 59,
 '時': 60,
 '提供': 61,
 'fitbitpay': 62,
 '功能': 63,
 '卡': 64,
 '費用': 65,
 '重新': 66,
 '外幣': 67,
 '消費': 68,
 'line': 69,
 '電子': 70,
 'hamipay': 71,
 '時間': 72,
 '多久': 73,
 '手續費': 74,
 '變更': 75,
 '綜合': 76,
 '錯誤': 77,
 '手機': 78,
 '及': 79,
 '人臉': 80,
 '國外': 81,
 '存款': 82,
 '完成': 83,
 'i刷': 84,
 '繳款': 85,
 '收到': 86,
 '已': 87,
 '無法': 88,
 '訊息': 89,
 '輸入': 90,
 '雙幣卡': 91,
 '額度'

In [7]:
user_sequences = tokenizer.texts_to_sequences([user_sentences_split,])
user_padded = pad_sequences(user_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

In [8]:
print(user_sequences)
print(user_padded)

[[9, 1, 1, 681, 9, 3, 10, 85, 160]]
[[  9   1   1 681   9   3  10  85 160   0   0   0   0   0   0   0   0   0
    0   0]]


In [9]:
# print(testing_padded[0]) # 對應數位金融-行動銀行1.wav來檢查是否神經網路輸出正確

### NN

#### 找出可能的一個

In [10]:
y_test_predicted_nn = ''

# 載入模型
model = tf.keras.models.load_model('./models/nn_model.h5')

sentences_probabilities = model.predict(user_padded)
print(sentences_probabilities)
for sentence_probabilities in sentences_probabilities:
    cur_max_sentence_prob = max(sentence_probabilities)
    for index, q in enumerate(sentence_probabilities):
        if q == cur_max_sentence_prob:
            y_test_predicted_nn = index
print(label_index_dict[y_test_predicted_nn])

[[3.7467577e-14 1.0140836e-06 1.3547175e-18 7.5565518e-19 2.9184402e-10
  4.6273426e-18 1.1762869e-08 1.6780743e-11 9.9842763e-01 5.7950554e-05
  4.7070130e-18 1.4471620e-03 1.4429508e-14 8.2845464e-10 6.6124703e-05]]
貸款


### NN
#### 找出最可能的3個

In [11]:
sentences_top_three = heapq.nlargest(3, zip(sentences_probabilities[0], list(range(len(sentences_probabilities[0])))))
print(sentences_top_three)
top_three_nn = []
for i in sentences_top_three:
    top_three_nn.append(label_index_dict[i[1]])
print(top_three_nn)

[(0.9984276, 8), (0.001447162, 11), (6.61247e-05, 14)]
['貸款', '信用卡', '存款帳戶']


### LSTM 

#### 找出可能的一個

In [12]:
y_test_predicted_lstm = ''

# 載入模型
modelLSTM = tf.keras.models.load_model('./models/lstm_model.h5')

sentences_probabilities = modelLSTM.predict(user_padded)
print(sentences_probabilities)
for sentence_probabilities in sentences_probabilities:
    cur_max_sentence_prob = max(sentence_probabilities)
    for index, q in enumerate(sentence_probabilities):
        if q == cur_max_sentence_prob:
            y_test_predicted_lstm = index
print(label_index_dict[y_test_predicted_lstm])

[[8.3477912e-04 2.9527089e-07 7.6854303e-02 7.5298536e-05 1.0590186e-06
  4.8947398e-04 7.3473775e-01 5.3143799e-07 1.6044482e-03 4.0543787e-06
  7.4633974e-08 1.8487394e-01 1.2461520e-04 3.8136375e-05 3.6133561e-04]]
行動支付 - Hami Pay


### LSTM
#### 找出可能的3個

In [13]:
sentences_top_three = heapq.nlargest(3, zip(sentences_probabilities[0], list(range(len(sentences_probabilities[0])))))
print(sentences_top_three)
top_three_lstm = []
for i in sentences_top_three:
    top_three_lstm.append(label_index_dict[i[1]])
print(top_three_lstm)

[(0.73473775, 6), (0.18487394, 11), (0.0768543, 2)]
['行動支付 - Hami Pay', '信用卡', '數位金融 - 行動銀行']


### CNN

#### 找出可能的一個

In [14]:
y_test_predicted_cnn = ''

# 載入模型
modelCNN = tf.keras.models.load_model('./models/cnn_model.h5')

sentences_probabilities = modelCNN.predict(user_padded)
print(sentences_probabilities)
for sentence_probabilities in sentences_probabilities:
    cur_max_sentence_prob = max(sentence_probabilities)
    for index, q in enumerate(sentence_probabilities):
        if q == cur_max_sentence_prob:
            y_test_predicted_cnn = index
print(label_index_dict[y_test_predicted_cnn])

[[1.6058648e-24 9.5040571e-18 6.7287132e-14 7.6552576e-15 1.4979563e-16
  5.5601856e-11 1.3153732e-07 2.1539246e-12 2.9552266e-24 1.0845410e-24
  2.7008307e-25 9.9999988e-01 1.8205493e-11 3.0902814e-10 1.5599606e-14]]
信用卡


### CNN
#### 找出最可能的3個

In [15]:
sentences_top_three = heapq.nlargest(3, zip(sentences_probabilities[0], list(range(len(sentences_probabilities[0])))))
print(sentences_top_three)
top_three_cnn = []
for i in sentences_top_three:
    top_three_cnn.append(label_index_dict[i[1]])
print(top_three_cnn)

[(0.9999999, 11), (1.3153732e-07, 6), (3.0902814e-10, 13)]
['信用卡', '行動支付 - Hami Pay', 'MyBill 輕鬆繳']


### 集成式學習(取nn、LSTM、CNN)三個神經網路共同決策出三個最可能的結果

In [16]:
temp = []
for index in range(3):
    temp.extend([top_three_lstm[index], top_three_cnn[index], top_three_nn[index]])
uniq = []
[uniq.append(x) for x in temp if x not in uniq]
top_three_ensemble = uniq[:3]

print(top_three_ensemble)

['行動支付 - Hami Pay', '信用卡', '貸款']


In [17]:
# tensorboard --logdir=./logs

## 驗證與測試(一次測試多個句子主要針對人性化問題統計正確率)

In [18]:
import pandas as pd
user_texts_and_labels = pd.read_excel('語音目錄.xlsx')
user_texts_and_labels.head()

Unnamed: 0,檔名,問題描述(Input),是否有音檔,預設類別,相關程度(123低中高)1表示幾乎沒看過3表示類似問題已經被丟入訓練集只是問法或是贅字
0,1,你好，我的信用卡遺失了，想要辦掛失,,信用卡,3.0
1,2,我的信用卡丟在國外，想要立即辦理掛失,,信用卡,3.0
2,3,你好，我之後要出國旅遊，所以想要調高信用卡的額度,,信用卡,2.0
3,4,您好我有三倍卷綁定信用卡的問題想要請教,,信用卡,2.0
4,5,我想要透過信用卡捐款給慈濟，想問一下要怎麼捐,,信用卡,2.0


In [19]:
user_text_list = user_texts_and_labels[user_texts_and_labels['問題描述(Input)'].notna()]['問題描述(Input)']
user_text_labels = user_texts_and_labels[user_texts_and_labels['問題描述(Input)'].notna()]['預設類別']

In [20]:
top_three_ensemble_list = []
for user_text in user_text_list:
    user_sentences = user_text

    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    #去除標點符號、全部轉為小寫
    reg = "[^0-9A-Za-z\u4e00-\u9fa5]"
    user_sentences_without_punctuation = re.sub(reg,'', user_sentences)
    user_sentences_without_punctuation = user_sentences_without_punctuation.lower()
#     print(user_sentences_without_punctuation)

    # jieba
    words = jieba.cut(user_sentences_without_punctuation, cut_all=False)
    sentence_split = ''
    for word in words:
        sentence_split += ' ' + word
    user_sentences_split = sentence_split
    print(user_sentences_split)
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    user_sequences = tokenizer.texts_to_sequences([user_sentences_split,])
    user_padded = pad_sequences(user_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    y_test_predicted_nn = ''

    # 載入模型
#     model = tf.keras.models.load_model('./models/nn_model.h5')

    sentences_probabilities = model.predict(user_padded)
#     print(sentences_probabilities)
    for sentence_probabilities in sentences_probabilities:
        cur_max_sentence_prob = max(sentence_probabilities)
        for index, q in enumerate(sentence_probabilities):
            if q == cur_max_sentence_prob:
                y_test_predicted_nn = index
    # print(label_index_dict[y_test_predicted_nn])
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    sentences_top_three = heapq.nlargest(3, zip(sentences_probabilities[0], list(range(len(sentences_probabilities[0])))))
    # print(sentences_top_three)
    top_three_nn = []
    for i in sentences_top_three:
        top_three_nn.append(label_index_dict[i[1]])
    # print(top_three_nn)
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    y_test_predicted_lstm = ''

    # 載入模型
#     modelLSTM = tf.keras.models.load_model('./models/lstm_model.h5')

    sentences_probabilities = modelLSTM.predict(user_padded)
    # print(sentences_probabilities)
    for sentence_probabilities in sentences_probabilities:
        cur_max_sentence_prob = max(sentence_probabilities)
        for index, q in enumerate(sentence_probabilities):
            if q == cur_max_sentence_prob:
                y_test_predicted_lstm = index
    # print(label_index_dict[y_test_predicted_lstm])
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    sentences_top_three = heapq.nlargest(3, zip(sentences_probabilities[0], list(range(len(sentences_probabilities[0])))))
    # print(sentences_top_three)
    top_three_lstm = []
    for i in sentences_top_three:
        top_three_lstm.append(label_index_dict[i[1]])
    # print(top_three_lstm)
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    y_test_predicted_cnn = ''

    # 載入模型
#     modelCNN = tf.keras.models.load_model('./models/cnn_model.h5')

    sentences_probabilities = modelCNN.predict(user_padded)
    # print(sentences_probabilities)
    for sentence_probabilities in sentences_probabilities:
        cur_max_sentence_prob = max(sentence_probabilities)
        for index, q in enumerate(sentence_probabilities):
            if q == cur_max_sentence_prob:
                y_test_predicted_cnn = index
    # print(label_index_dict[y_test_predicted_cnn])
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    sentences_top_three = heapq.nlargest(3, zip(sentences_probabilities[0], list(range(len(sentences_probabilities[0])))))
    # print(sentences_top_three)
    top_three_cnn = []
    for i in sentences_top_three:
        top_three_cnn.append(label_index_dict[i[1]])
    # print(top_three_cnn)
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    temp = []
    for index in range(3):
        temp.extend([top_three_lstm[index], top_three_cnn[index], top_three_nn[index]])
    uniq = []
    [uniq.append(x) for x in temp if x not in uniq]
    top_three_ensemble = uniq[:3]

    print(top_three_ensemble)
    top_three_ensemble_list.append(top_three_ensemble)
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#

 你好 我 的 信用卡 遺失 了 想 要辦 掛失
['ATM相關服務', '數位金融 - 行動銀行', '存款帳戶']
 我 的 信用卡 丟在 國外 想要 立即 辦理 掛失
['信用卡', '貸款', '數位金融 - 網路銀行']
 你好 我 之後 要 出國 旅遊 所以 想要 調 高 信用卡 的 額度
['信用卡', '存款帳戶', '數位金融 - 網路銀行']
 您好 我 有 三倍 卷 綁定 信用卡 的 問題 想要 請教
['信用卡', '貸款', '數位金融 - 網路銀行']
 我 想要 透過 信用卡 捐款 給 慈濟 想問 一下 要 怎麼 捐
['信用卡', '存款帳戶', '數位金融 - 網路銀行']
 您好 我 想要 更改 信用卡 帳單 寄送 地址
['信用卡', '貸款', '綜合對帳單']
 我 想要 問 一下 信用卡 機場 接送 的 問題
['信用卡', '存款帳戶', '數位金融 - 網路銀行']
 您好 我 想要 查詢 本期 信用卡 帳單 金額
['信用卡', '貸款', '基金投資']
 我 想要 看 一下 我 的 信用卡 繳款 紀錄
['行動支付 - Hami Pay', '信用卡', '貸款']
 我 想要 查 一下 我 這個 月 的 信用卡 可用 額度
['信用卡', '貸款', '存款帳戶']
 我 沒有 收到 6 月 的 信用卡 帳單
['信用卡', '存款帳戶', '基金投資']
 我 昨天 刷 了 一筆 消費 但是 我 想要 取消 這筆 交易
['信用卡', '存款帳戶', '數位金融 - 網路銀行']
 我 上 禮拜 辦 了 一張 信用卡 但是 卡片 一直 沒有 寄來 我想問 一下 目前 的 處理 進度
['存款帳戶', '信用卡', '貸款']
 我 想要 預約 機場 接送
['信用卡', '存款帳戶', '數位金融 - 網路銀行']
 我 原本 是 學生 卡 現在 已經 畢業 了 所以 想要 申請 調 高 額度
['信用卡', '存款帳戶', '數位金融 - 網路銀行']
 我 想要 使用 機場 貴賓室 請問 有 甚麼 卡有 這項 服務 的 嗎
['信用卡', '數位金融 - 網路銀行', '數位金融 - 行動銀行']
 我 想要 問有 關於 蝦皮 聯名卡 的 問題
['數位金融 - 行動銀行', '信用卡', '存款帳戶']

In [21]:
correct, total = 0, 0
for user_text_label, top_three_ensemble in zip(user_text_labels, top_three_ensemble_list):
#     print(user_text_label)
#     print(top_three_ensemble)
    total += 1
    if user_text_label in top_three_ensemble:
        correct += 1
print('Accuracy rate: %.2f %%' % (correct * 100 / total,))

Accuracy rate: 91.94 %


In [22]:
len(user_text_list)

62

In [23]:
data = {'問題描述(Input)':  user_text_list,
        '預測分類': top_three_ensemble_list,
        '正確分類': user_text_labels,
        }
user_text_predicted_df = pd.DataFrame (data, columns = ['問題描述(Input)','預測分類','正確分類'])

In [24]:
pd.set_option('max_columns',1000)
pd.set_option('max_row',300)

user_text_predicted_df['是否正確分類'] = user_text_predicted_df.apply (lambda row: 'o' if row['正確分類'] in row['預測分類'] else 'X', axis=1)
user_text_predicted_df

Unnamed: 0,問題描述(Input),預測分類,正確分類,是否正確分類
0,你好，我的信用卡遺失了，想要辦掛失,"[ATM相關服務, 數位金融 - 行動銀行, 存款帳戶]",信用卡,X
1,我的信用卡丟在國外，想要立即辦理掛失,"[信用卡, 貸款, 數位金融 - 網路銀行]",信用卡,o
2,你好，我之後要出國旅遊，所以想要調高信用卡的額度,"[信用卡, 存款帳戶, 數位金融 - 網路銀行]",信用卡,o
3,您好我有三倍卷綁定信用卡的問題想要請教,"[信用卡, 貸款, 數位金融 - 網路銀行]",信用卡,o
4,我想要透過信用卡捐款給慈濟，想問一下要怎麼捐,"[信用卡, 存款帳戶, 數位金融 - 網路銀行]",信用卡,o
5,您好，我想要更改信用卡帳單寄送地址,"[信用卡, 貸款, 綜合對帳單]",信用卡,o
6,我想要問一下信用卡機場接送的問題,"[信用卡, 存款帳戶, 數位金融 - 網路銀行]",信用卡,o
7,您好我想要查詢本期信用卡帳單金額,"[信用卡, 貸款, 基金投資]",信用卡,o
8,我想要看一下我的信用卡繳款紀錄,"[行動支付 - Hami Pay, 信用卡, 貸款]",信用卡,o
9,我想要查一下我這個月的信用卡可用額度,"[信用卡, 貸款, 存款帳戶]",信用卡,o


In [None]:
import glob
from scipy.io.wavfile import read
import speech_recognition as sr
import pandas as pd


rowsList = []
for filename in glob.glob('模擬人話語音檔\\*.wav'):
    r = sr.Recognizer()                        #預設辨識英文
    with sr.WavFile(filename) as source:  #讀取wav檔
        audio = r.record(source)
    try:
#         customerText = r.recognize_google(audio,language="zh-TW")
        customerText = r.recognize_sphinx(audio,language="zh_cn")
#         print("Transcription: " + customerText)
    except LookupError:
        print("Could not understand audio")
        
    # 紀錄正確的分類
    correctCategory = re.split(' \d+.wav', filename.split('\\')[1])[0]
    
    # 神經網路
    user_sentences = customerText

    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    #去除標點符號、全部轉為小寫
    reg = "[^0-9A-Za-z\u4e00-\u9fa5]"
    user_sentences_without_punctuation = re.sub(reg,'', user_sentences)
    user_sentences_without_punctuation = user_sentences_without_punctuation.lower()
#     print(user_sentences_without_punctuation)

    # jieba
    words = jieba.cut(user_sentences_without_punctuation, cut_all=False)
    sentence_split = ''
    for word in words:
        sentence_split += ' ' + word
    user_sentences_split = sentence_split
#     print(user_sentences_split)
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    user_sequences = tokenizer.texts_to_sequences([user_sentences_split,])
    user_padded = pad_sequences(user_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    y_test_predicted_nn = ''

    # 載入模型
#     model = tf.keras.models.load_model('./models/nn_model.h5')

    sentences_probabilities = model.predict(user_padded)
#     print(sentences_probabilities)
    for sentence_probabilities in sentences_probabilities:
        cur_max_sentence_prob = max(sentence_probabilities)
        for index, q in enumerate(sentence_probabilities):
            if q == cur_max_sentence_prob:
                y_test_predicted_nn = index
    # print(label_index_dict[y_test_predicted_nn])
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    sentences_top_three = heapq.nlargest(3, zip(sentences_probabilities[0], list(range(len(sentences_probabilities[0])))))
    # print(sentences_top_three)
    top_three_nn = []
    for i in sentences_top_three:
        top_three_nn.append(label_index_dict[i[1]])
    # print(top_three_nn)
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    y_test_predicted_lstm = ''

    # 載入模型
#     modelLSTM = tf.keras.models.load_model('./models/lstm_model.h5')

    sentences_probabilities = modelLSTM.predict(user_padded)
    # print(sentences_probabilities)
    for sentence_probabilities in sentences_probabilities:
        cur_max_sentence_prob = max(sentence_probabilities)
        for index, q in enumerate(sentence_probabilities):
            if q == cur_max_sentence_prob:
                y_test_predicted_lstm = index
    # print(label_index_dict[y_test_predicted_lstm])
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    sentences_top_three = heapq.nlargest(3, zip(sentences_probabilities[0], list(range(len(sentences_probabilities[0])))))
    # print(sentences_top_three)
    top_three_lstm = []
    for i in sentences_top_three:
        top_three_lstm.append(label_index_dict[i[1]])
    # print(top_three_lstm)
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    y_test_predicted_cnn = ''

    # 載入模型
#     modelCNN = tf.keras.models.load_model('./models/cnn_model.h5')

    sentences_probabilities = modelCNN.predict(user_padded)
    # print(sentences_probabilities)
    for sentence_probabilities in sentences_probabilities:
        cur_max_sentence_prob = max(sentence_probabilities)
        for index, q in enumerate(sentence_probabilities):
            if q == cur_max_sentence_prob:
                y_test_predicted_cnn = index
    # print(label_index_dict[y_test_predicted_cnn])
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    sentences_top_three = heapq.nlargest(3, zip(sentences_probabilities[0], list(range(len(sentences_probabilities[0])))))
    # print(sentences_top_three)
    top_three_cnn = []
    for i in sentences_top_three:
        top_three_cnn.append(label_index_dict[i[1]])
    # print(top_three_cnn)
    #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++#
    temp = []
    for index in range(3):
        temp.extend([top_three_lstm[index], top_three_cnn[index], top_three_nn[index]])
    uniq = []
    [uniq.append(x) for x in temp if x not in uniq]
    top_three_ensemble = uniq[:3]
#     print(top_three_ensemble)
    
#     判斷預測是否正確
    curPredict = '?'
    if correctCategory in top_three_ensemble:
        curPredict = 'O'
    else:
        curPredict = 'X'
    
    rowsList.append([customerText, correctCategory, top_three_ensemble[0], top_three_ensemble[1], top_three_ensemble[2], curPredict])
    print([customerText, correctCategory, top_three_ensemble[0], top_three_ensemble[1], top_three_ensemble[2], curPredict])


['您 跑 我該 被 公證 別的 累計 改名字 人 自然人 扣稅 啟 沒有 圖 上', 'ATM相關服務', '存款帳戶', '信託', '數位金融 - 網路ATM', 'X']
['您 跑 我 想 要 舊 了 無 卡 提款 登 服務', 'ATM相關服務', 'ATM相關服務', '數位金融 - 網路ATM', '存款帳戶', 'O']
['我該 金融卡 不想 行動 有 了 請 保固 掛失', 'ATM相關服務', '數位金融 - 行動銀行', '基金投資', '數位金融 - LINE個人化服務', 'X']
['我 想 要 問卷 嗎 留 金等 繳交 非 持續 交付 手續費 嗎', 'MyBill 輕鬆繳', '存款帳戶', '基金投資', '信託', 'X']
['您 跑 我該 信用卡 遺失 了 想 要 辦 掛失', '信用卡', '貸款', '數位金融 - 行動銀行', '數位金融 - 數位存款帳戶', 'X']
['我該 信用卡 留 代扣 划 找 要 利率 單筆 掛失', '信用卡', '信用卡', '數位金融 - 網路銀行', '數位金融 - 行動銀行', 'O']
['您 跑 我 之後 要 註 過 旅遊 做 影響 要 舊 當 信用卡 登 額度', '信用卡', '信用卡', '貸款', '數位金融 - 網路銀行', 'O']
['您 跑 我 有 三個 一致 當 定期 用 卡該 問題 想 要 績效', '信用卡', '存款帳戶', '貸款', '數位金融 - 網路ATM', 'X']
['我 想 要 超過 信用卡 捐款 累計 記 想 我 一下 要 怎麼 支援', '信用卡', '存款帳戶', '信用卡', '數位金融 - 行動銀行', 'O']
['您 跑 我 想 要 跟 該 信用卡 帳單 寄送 定期', '信用卡', '貸款', '信用卡', '數位金融 - 數位存款帳戶', 'O']
['我 想 要 我 一下 信用卡 機場 接送 的 問題', '信用卡', '信用卡', '存款帳戶', '數位金融 - 網路銀行', 'O']
['您 跑 我 想 要 查詢 分期 信用卡 帳單 金額', '信用卡', '信用卡', '基金投資', '數位金融 - 網路銀行', 'O']
['我 想 要 開立 一下 我該 信用卡 繳款單 記錄', '信用卡', '數位金融 

In [None]:
column_names = ["QUESTION(TRANSCRIPTION)", "CORRECT_ANSWER", "PREDICT_ANSWER1", "PREDICT_ANSWER2", "PREDICT_ANSWER3", "CORRECTORNOT"]
audioAccuracyDf = pd.DataFrame(rowsList, columns = column_names)

In [None]:
audioAccuracyDf.loc[[7,5,21,0,24,34, 35,46],:].rename(columns={
    "QUESTION(TRANSCRIPTION)": "語音轉換過的句子", "CORRECT_ANSWER": "正確分類", 
    "PREDICT_ANSWER1": "預測分類1", "PREDICT_ANSWER2": "預測分類2", 
    "PREDICT_ANSWER3": "預測分類3","CORRECTORNOT": "分類是否正確"})

In [None]:
print('準確率: %s' % (audioAccuracyDf['CORRECTORNOT'].value_counts(1)["O"],))

In [None]:
# 準確率(文本): 93.55
# 準確率(google api): 0.9032258064516129 0.9193548387096774
# 準確率(離線版 中文辭典): 0.41935483870967744
# 準確率(離線版 自建辭典 350): 0.6774193548387096
# 準確率(離線版 自建辭典 500): 0.6129032258064516
# 準確率(離線版 自建辭典 650): 0.5967741935483871
# 準確率(離線版 自建辭典 800): 0.6451612903225806
# 準確率(離線版 自建辭典 900): 0.7258064516129032
# 準確率(離線版 自建辭典 1050): 0.7096774193548387 0.6935483870967742

In [None]:
audioAccuracyDf.loc[audioAccuracyDf['CORRECTORNOT'] != 'O']