In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pickle
import os

    
words_path = os.path.join(os.getcwd(), "words.pkl")
with open(words_path, 'rb') as f_words:
    words = pickle.load(f_words)
    
classes_path = os.path.join(os.getcwd(), "classes.pkl")
with open(classes_path, 'rb') as f_classes:
    classes = pickle.load(f_classes)
    
classes_index_path = os.path.join(os.getcwd(), "classes_index.pkl")
with open(classes_index_path, 'rb') as f_classes_index:
    classes_index = pickle.load(f_classes_index)
    
index_classes = dict(zip(classes_index.keys(), classes_index.values()))

print('index_classes:{}'.format(index_classes))

class classifyModel(nn.Module):
    
    def __init__(self):
        super(classifyModel, self).__init__()
        self.model = nn.Sequential(
                nn.Linear(len(words), 128),
                nn.ReLU(),
                nn.Dropout(0.5),
                nn.Linear(128, 64),
                nn.ReLU(),
                nn.Dropout(0.5),
                nn.Linear(64, len(classes)))
    def forward(self, x):
            out = self.model(x)
            return out
        
model = classifyModel()
model_path = os.path.join(os.getcwd(), "model.h5")
model.load_state_dict(torch.load(model_path))

index_classes:{0: 'nm 评分', 1: 'nm 上映时间', 2: 'nm 类型', 3: 'nm 简介', 4: 'nm 演员列表', 5: 'nnt 介绍', 6: 'nnt ng电影作品', 7: 'nnt 电影作品', 8: 'nnt 参演评分大于 x', 9: 'nnt 参演评分小于 x', 10: 'nnt 电影类型', 11: 'nnt nnr合作电影列表', 12: 'nnt 电影数量', 13: 'nnt 出生日期', 14: '评分大于x电影', 15: '评分大于x的ng类型电影'}


<All keys matched successfully>

In [2]:
from pyhanlp import HanLP

segment = HanLP.newSegment().enableCustomDictionaryForcing(True)


# 分词，需要将电影名，演员名和评分数字转为nm，nnt，ng
def sentence_segment(sentence):
    word_nature = segment.seg(sentence)
    print(word_nature)
    sentence_words = []
    for term in word_nature:
        if str(term.nature) == 'nnt':
            sentence_words.append('nnt')
        elif str(term.nature) == 'nm':
            sentence_words.append('nm')
        elif str(term.nature) == 'ng':
            sentence_words.append('ng')
        elif str(term.nature) == 'm':
            sentence_words.append('x')
        else:
            sentence_words.append(term.word)
    print(sentence_words)
    return sentence_words

def bow(sentence, words, show_detail = True):
    sentence_words = sentence_segment(sentence)
    #词袋
    bag = [0] * len(words)
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s:
                bag[i] = 1 #词在词典中
            if show_detail:
                print("found in bag:{}".format(w))
    return [bag]

def predict_class(sentence, model):
    sentence_bag = bow(sentence, words, False)
    model.eval()
    with torch.no_grad():
        outputs = model(torch.FloatTensor(sentence_bag))
    print('outputs:{}'.format(outputs))
    predicted_prob,predicted_index = torch.max(F.softmax(outputs, 1), 1)#预测最大类别的概率与索引
    print('softmax_prob:{}'.format(predicted_prob))
    print('softmax_index:{}'.format(predicted_index))
    results = []
    #results.append({'intent':index_classes[predicted_index.detach().numpy()[0]], 'prob':predicted_prob.detach().numpy()[0]})
    results.append({'intent':predicted_index.detach().numpy()[0], 'prob':predicted_prob.detach().numpy()[0]})
    print('result:{}'.format(results))
    return results
 
def get_response(predict_result):
    tag = predict_result[0]['intent']
    return tag

def chatbot_response(text):
    predict_result = predict_class(text, model)
    res = get_response(predict_result)
    return res
print(chatbot_response("成龙的动作影片"))

[成龙/nnt, 的/ude1, 动作影片/ng]
['nnt', '的', 'ng']
outputs:tensor([[  1.8995,  -9.4150,   1.0707, -10.6612,  -5.5105,   5.0239,  14.2964,
          -5.4456,   0.4897,   4.2912,   3.6186,  -3.4077,  -0.4035,   5.1586,
          -8.0579,  10.5380]])
softmax_prob:tensor([0.9769])
softmax_index:tensor([6])
result:[{'intent': 6, 'prob': 0.9769448}]
6
