In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pickle
import numpy as np
import os
import json
import random
    
words_path = os.path.join(os.getcwd(), "words.pkl")
with open(words_path, 'rb') as f_words:
    words = pickle.load(f_words)
    
class TextRCNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_size, num_layers, output_size, dropout=0.5):
        super(TextRCNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        # 这里batch_first=True，只影响输入和输出。hidden与cell还是batch在第2维
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, bidirectional=True, batch_first=True, dropout=dropout)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size*2+embedding_dim, output_size)
    
    def forward(self, x):
         # x :(batch, seq_len) = (163, 20)
        # [batch,seq_len,embedding_dim] -> (163, 20, 300)
        x = self.embedding(x)
        #out=[batch_size, seq_len, hidden_size*2]
        #h=[num_layers*2, batch_size, hidden_size]
        #c=[num_layers*2, batch_size, hidden_size]
        out,(h, c)= self.lstm(x)
        # 拼接embedding与bilstm
        out = torch.cat((x, out), 2) # [batch_size, seq_len, embedding_dim + hidden_size*2]
        # 激活
        # out = F.tanh(out)
        out = F.relu(out)
        # 维度转换 => [batch_size, embedding_dim + hidden_size*2, seq_len]
        #out = torch.transpose(out, 1, 2),一维卷积是对输入数据的最后一维进行一维卷积
        out = out.permute(0, 2, 1)
        out = F.max_pool1d(out, out.size(2))
        out = out.squeeze(-1) # [batch_size,embedding_dim + hidden_size * 2]
        out = self.dropout(out)
        out = self.fc(out) # [batch_size, output_size]
        return out
        
model = TextRCNN(len(words), 300, 128, 2, 16)
model_path = os.path.join(os.getcwd(), "model.h5")
model.load_state_dict(torch.load(model_path))


<All keys matched successfully>

In [2]:
from pyhanlp import HanLP

segment = HanLP.newSegment().enableCustomDictionaryForcing(True)


# 分词，需要将电影名，演员名和评分数字转为nm，nnt，ng
def sentence_segment(sentence):
    word_nature = segment.seg(sentence)
    print(word_nature)
    sentence_words = []
    for term in word_nature:
        if str(term.nature) == 'nnt':
            sentence_words.append('nnt')
        elif str(term.nature) == 'nm':
            sentence_words.append('nm')
        elif str(term.nature) == 'ng':
            sentence_words.append('ng')
        elif str(term.nature) == 'm':
            sentence_words.append('x')
        else:
            sentence_words.extend(list(term.word))
    print(sentence_words)
    return sentence_words

def bow(sentence, words, show_detail = True):
    sentence_words = sentence_segment(sentence)
    indexed = [words.stoi[t] for t in sentence_words]
    src_tensor = torch.LongTensor(indexed)
    src_tensor = src_tensor.unsqueeze(0)
    return src_tensor

def predict_class(sentence, model):
    sentence_bag = bow(sentence, words, False)
    model.eval()
    with torch.no_grad():
        outputs = model(sentence_bag)
    print('outputs:{}'.format(outputs))
    predicted_prob,predicted_index = torch.max(F.softmax(outputs, 1), 1)#预测最大类别的概率与索引
    print('softmax_prob:{}'.format(predicted_prob))
    print('softmax_index:{}'.format(predicted_index))
    results = []
    #results.append({'intent':index_classes[predicted_index.detach().numpy()[0]], 'prob':predicted_prob.detach().numpy()[0]})
    results.append({'intent':predicted_index.detach().numpy()[0], 'prob':predicted_prob.detach().numpy()[0]})
    print('result:{}'.format(results))
    return results
 
def get_response(predict_result):
    tag = predict_result[0]['intent']
    return tag

def predict(text):
    predict_result = predict_class(text, model)
    res = get_response(predict_result)
    return res
print(predict("成龙主演过的电影有哪些"))

[成龙/nnt, 主/ag, 演过/v, 的/ude1, 电影/n, 有/vyou, 哪些/ry]
['nnt', '主', '演', '过', '的', '电', '影', '有', '哪', '些']
outputs:tensor([[ -9.8885,  -3.7893,  -6.1609,  -4.4332,   5.9214,  -4.1137,   7.2106,
          13.9162,   7.0675,   0.4421,   7.2791,   6.9780,   4.8577,  -5.4598,
         -10.0143,  -9.8310]])
softmax_prob:tensor([0.9950])
softmax_index:tensor([7])
result:[{'intent': 7, 'prob': 0.99500436}]
7
