In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pickle
import numpy as np
import os
import json
import random
    
words_path = os.path.join(os.getcwd(), "words.pkl")
with open(words_path, 'rb') as f_words:
    words = pickle.load(f_words)
    
# 构建分类模型
class TextRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size, num_layers, output_size, dropout=0.5):
        super(TextRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        # 这里batch_first=True，只影响输入和输出。hidden与cell还是batch在第2维
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, bidirectional=True, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size*2, output_size)
        
    def forward(self, x):
        # x :(batch, seq_len) = (163, 20)
        # [batch,seq_len,embedding_dim] -> (163, 20, 300)
        x = self.embedding(x) 
        #out=[batch_size, seq_len, hidden_size*2]
        #h=[num_layers*2, batch_size, hidden_size]
        #c=[num_layers*2, batch_size, hidden_size]
        out,(h, c)= self.lstm(x)
        # 最后时刻的hidden
        out = self.fc(out[:, -1, :])
        return out
        
model = TextRNN(len(words), 300, 128, 2, 16)
model_path = os.path.join(os.getcwd(), "model.h5")
model.load_state_dict(torch.load(model_path))


<All keys matched successfully>

In [13]:
from pyhanlp import HanLP

segment = HanLP.newSegment().enableCustomDictionaryForcing(True)


# 分词，需要将电影名，演员名和评分数字转为nm，nnt，ng
def sentence_segment(sentence):
    word_nature = segment.seg(sentence)
    print(word_nature)
    sentence_words = []
    for term in word_nature:
        if str(term.nature) == 'nnt':
            sentence_words.append('nnt')
        elif str(term.nature) == 'nm':
            sentence_words.append('nm')
        elif str(term.nature) == 'ng':
            sentence_words.append('ng')
        elif str(term.nature) == 'm':
            sentence_words.append('x')
        else:
            sentence_words.extend(list(term.word))
    print(sentence_words)
    return sentence_words

def bow(sentence, words, show_detail = True):
    sentence_words = sentence_segment(sentence)
    indexed = [words.stoi[t] for t in sentence_words]
    src_tensor = torch.LongTensor(indexed)
    src_tensor = src_tensor.unsqueeze(0)
    return src_tensor

def predict_class(sentence, model):
    sentence_bag = bow(sentence, words, False)
    model.eval()
    with torch.no_grad():
        outputs = model(sentence_bag)
    print('outputs:{}'.format(outputs))
    predicted_prob,predicted_index = torch.max(F.softmax(outputs, 1), 1)#预测最大类别的概率与索引
    print('softmax_prob:{}'.format(predicted_prob))
    print('softmax_index:{}'.format(predicted_index))
    results = []
    #results.append({'intent':index_classes[predicted_index.detach().numpy()[0]], 'prob':predicted_prob.detach().numpy()[0]})
    results.append({'intent':predicted_index.detach().numpy()[0], 'prob':predicted_prob.detach().numpy()[0]})
    print('result:{}'.format(results))
    return results
 
def get_response(predict_result):
    tag = predict_result[0]['intent']
    return tag

def predict(text):
    predict_result = predict_class(text, model)
    res = get_response(predict_result)
    return res
print(predict("张国荣"))

[张国荣/nnt]
['nnt']
outputs:tensor([[ 1.8594,  1.9003,  1.5639, -1.2812,  5.1077, -1.6340,  2.0733,  1.8545,
         -8.1699, -6.1456,  1.7393, -0.6705,  3.4907,  2.7885, -2.3077, -2.1625]])
softmax_prob:tensor([0.6521])
softmax_index:tensor([4])
result:[{'intent': 4, 'prob': 0.6521061}]
4
