# 导入相关库

In [37]:
import numpy as np
import pandas as pd
from keras.models import model_from_json
from keras.preprocessing.sequence import pad_sequences
import jieba
import pickle

# 加载模型 SMP2018_model(F1_86)

In [38]:
# 加载 pickle 对象的函数
def load_obj(name ):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)
    
# 输入模型的最终单句长度
max_cut_query_lenth = 26

# 加载查询词汇和对应 ID 的字典
word_2_index_dict = load_obj('word_2_index_dict')
# 加载模型输出 ID 和对应标签（种类）的字典
index_2_label_dict = load_obj('index_2_label_dict')
# 加载模型结构
model_structure_json = load_obj('model_structure_json')
model = model_from_json(model_structure_json)
# 加载模型权重
model.load_weights('SMP2018_GlobalAveragePooling1D_model(F1_86).h5')

# 使用模型的函数

In [39]:
def query_2_label(query_sentence):
    '''
    input query: "从中山到西安的汽车。"
    return label: "bus"
    '''
    x_input = []
    # 分词 ['从', '中山', '到', '西安', '的', '汽车', '。']
    query_sentence_list = list(jieba.cut(query_sentence))
    # 序列化 [54, 717, 0, 8, 0, 0, 1, 0, 183, 2]
    x = [word_2_index_dict.get(w, 0) for w in query_sentence]
    # 填充  array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
    #      0,   0,   0,  54, 717,   0,   8,   0,   0,   1,   0, 183,   2]], dtype=int32)
    x_input.append(x)
    x_input = pad_sequences(x_input, maxlen=max_cut_query_lenth)
    # 预测
    y_hat = model.predict(x_input)
    # 取最大值所在的序号 11
    pred_y_index = np.argmax(y_hat)
    # 查找序号所对应标签（类别）
    label = index_2_label_dict[pred_y_index]
    return label

# 使用例子

In [49]:
query_sentence = '狐臭怎么治？'

print(query_2_label(query_sentence))

# 对 2299 条数据进行预测演示

## 获取数据

In [51]:
def get_json_data(path):
    # read data
    data_df = pd.read_json(path)
    # change row and colunm
    data_df = data_df.transpose()
    # change colunm order
    data_df = data_df[['query', 'label']]
    return data_df

In [52]:
data_df = get_json_data(path="../data/train.json")

In [53]:
data_df.describe()

Unnamed: 0,query,label
count,2299,2299
unique,2299,31
top,还是想知道你能做些什么,chat
freq,1,455


## 查看前 10 条数据 

In [54]:
data_df.head(10)

Unnamed: 0,query,label
0,今天东莞天气如何,weather
1,从观音桥到重庆市图书馆怎么走,map
2,鸭蛋怎么腌？,cookbook
3,怎么治疗牛皮癣,health
4,唠什么,chat
5,阳澄湖大闸蟹的做法。,cookbook
6,昆山大润发在哪里,map
7,红烧肉怎么做？嗯？,cookbook
8,南京到厦门的火车票,train
9,6的平方,calc


## 模型预测，并查看前 10 条数据

In [55]:
data_df['model_prediction_label'] = data_df['query'].apply(query_2_label)

data_df.head(10)

Unnamed: 0,query,label,model_prediction_label
0,今天东莞天气如何,weather,datetime
1,从观音桥到重庆市图书馆怎么走,map,map
2,鸭蛋怎么腌？,cookbook,cookbook
3,怎么治疗牛皮癣,health,chat
4,唠什么,chat,chat
5,阳澄湖大闸蟹的做法。,cookbook,cookbook
6,昆山大润发在哪里,map,chat
7,红烧肉怎么做？嗯？,cookbook,cookbook
8,南京到厦门的火车票,train,bus
9,6的平方,calc,calc
