In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pickle
import numpy as np
import os

knowledge_points_path = os.path.join(os.getcwd(), "knowledge_points.pkl")
with open(knowledge_points_path, 'rb') as f_knowledge_points:
    knowledge_points = pickle.load(f_knowledge_points)
    
knowledge_points

['“重农抑商”政策',
 '不完全显性',
 '与细胞分裂有关的细胞器',
 '中央官制——三公九卿制',
 '中心体的结构和功能',
 '人体免疫系统在维持稳态中的作用',
 '人体水盐平衡调节',
 '人体的体温调节',
 '人口增长与人口问题',
 '人口迁移与人口流动',
 '人工授精、试管婴儿等生殖技术',
 '伴性遗传',
 '体液免疫的概念和过程',
 '免疫系统的功能',
 '免疫系统的组成',
 '兴奋在神经元之间的传递',
 '兴奋在神经纤维上的传导',
 '内环境的稳态',
 '内质网的结构和功能',
 '农业区位因素',
 '减数分裂与有丝分裂的比较',
 '减数分裂的概念',
 '劳动就业与守法经营',
 '器官移植',
 '地球所处的宇宙环境',
 '地球的内部圈层结构及特点',
 '地球的外部圈层结构及特点',
 '地球运动的地理意义',
 '地球运动的基本形式',
 '垄断组织的出现',
 '培养基与无菌技术',
 '基因工程的原理及技术',
 '基因工程的概念',
 '基因的分离规律的实质及应用',
 '基因的自由组合规律的实质及应用',
 '复等位基因',
 '夏商两代的政治制度',
 '太阳对地球的影响',
 '工业区位因素',
 '拉马克的进化学说',
 '文艺的春天',
 '核糖体的结构和功能',
 '海峡两岸关系的发展',
 '液泡的结构和功能',
 '清末民主革命风潮',
 '溶酶体的结构和功能',
 '激素调节',
 '生命活动离不开细胞',
 '生态系统的营养结构',
 '生物工程技术',
 '生物性污染',
 '生物技术在其他方面的应用',
 '皇帝制度',
 '社会主义市场经济的伦理要求',
 '社会主义是中国人民的历史性选择',
 '神经调节和体液调节的比较',
 '第三产业的兴起和“新经济”的出现',
 '组成细胞的化合物',
 '组成细胞的化学元素',
 '细胞大小与物质运输的关系',
 '细胞有丝分裂不同时期的特点',
 '细胞的多样性和统一性',
 '群落的结构',
 '胚胎移植',
 '蛋白质的合成',
 '血糖平衡的调节',
 '走进细胞',
 '选官、用官制度的变化',
 '遗传的分子基础',
 '遗传的细胞基础',
 '避孕的原理和方法',
 '郡县制',
 '高尔基体的结构和功能']

In [5]:
words_path = os.path.join(os.getcwd(), "words.pkl")
with open(words_path, 'rb') as f_words:
    words = pickle.load(f_words)
words

<torchtext.vocab.Vocab at 0x26573d209b0>

In [6]:
# 构建分类模型
class TextCNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, output_size, filter_num=100, filter_size=(3,4,5), dropout=0.5):
        '''
        vocab_size:词典大小
        embedding_dim:词维度大小
        output_size:输出类别数
        filter_num:卷积核数量
        filter_size(3,4,5):三种卷积核，size为3,4,5，每个卷积核有filter_num个，卷积核的宽度都是embedding_dim
        '''
        super(TextCNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        # conv2d(in_channel,out_channel,kernel_size,stride,padding),stride默认为1，padding默认为0
        self.convs = nn.ModuleList([nn.Conv2d(1, filter_num,(k, embedding_dim)) for k in filter_size])
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(filter_num * len(filter_size), output_size)

    def forward(self, x):
        # x :(batch, seq_len)
        x = self.embedding(x) # [batch,word_num,embedding_dim] = [N,H,W] 
        x = x.unsqueeze(1) # [batch, channel, word_num, embedding_dim] = [N,C,H,W] 
        x = [F.relu(conv(x)).squeeze(3) for conv in self.convs] # len(filter_size) * (N, filter_num, H) 
        # MaxPool1d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False),stride默认为kernal_size
        x = [F.max_pool1d(output,output.shape[2]).squeeze(2) for output in x] # len(filter_size) * (N, filter_num) 
        x = torch.cat(x, 1) # (N, filter_num * len(filter_size))
        x = self.dropout(x)
        x = self.fc(x)
        return x
        
model = TextCNN(len(words), 300, 73)
model_path = os.path.join(os.getcwd(), "model.h5")
model.load_state_dict(torch.load(model_path))




<All keys matched successfully>

In [7]:
parent_path = os.path.abspath(os.path.join(os.getcwd(), '..'))
stopwords_path = os.path.join(parent_path,  'data', 'stopwords.txt')

stopwords_set = set()
with open(stopwords_path, 'r', encoding='utf-8') as f_read:
    for line in f_read:
        stopwords_set.add(line.strip())

print('stop words len :{}'.format(len(stopwords_set)))

stop words len :859


In [8]:
import jieba
import re

def content_preprocess(content):
    # 去标点
    r = re.compile("[^\u4e00-\u9fa5]+|题目")
    content = r.sub("", content)  # 删除所有非汉字字符
    # jieba分词
    words = jieba.cut(content, cut_all=False)
    words = [w for w in words if w not in stopwords_set]
    # words = ' '.join(words)
    return words

segment = content_preprocess('下表是美国、欧共体和日本的国民经济占世界经济总量的比例表（单位：亿美元）。结合所学知识判断，对此分析不正确的是（   ）年份美国欧共体日本195634.88%15.85%4.26%197327.08%27.017%18.023%A. 福利政策的实施已然严重阻碍了美国经济发展B. 美、欧、日三足鼎立之势有力冲击着两极格局C. 越南战争已将美国经济拖入了‘滞胀”的轨道D. 欧、日经济的迅速发展威胁着美国的霸主地位题型')


Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\User\AppData\Local\Temp\jieba.cache
Loading model cost 0.659 seconds.
Prefix dict has been built successfully.


In [9]:
def bow(sentence):
    sentence_words = content_preprocess(sentence)
    indexed = [words.stoi[t] for t in sentence_words]
    src_tensor = torch.LongTensor(indexed)
    src_tensor = src_tensor.unsqueeze(0)
    return src_tensor

def predict_class(sentence):
    sentence_bag = bow(sentence)
    model.eval()
    with torch.no_grad():
        outputs = model(sentence_bag)
    print('outputs:{}'.format(outputs))
    predicts =F.sigmoid(outputs).data.numpy() > 0.5
    predicts = predicts.astype(int)
    print('predict result:{}'.format(predicts))
  
    return predicts

def predict(text):
    predict_result = predict_class(text)
    return predict_result

result = predict('秦始皇统一六国后创制了一套御玺。如任命国家官员，则封印“皇帝之玺”；若任命四夷的官员，则用“天子之玺”；信玺用于对国内和四夷用兵事宜，行玺则为皇帝外巡时随身携带。材料不能说明（   ）A. 皇帝处于至高无上的地位B. 秦朝有内外两种系统处理国事C. 秦朝实行中央集权的体制D. 三公九卿制大大提升行政效率题型: 单选题|难度: 一般|使用次数: 0|纠错复制收藏到空间加入选题篮查看答案解析答案：D解析：本题要求选择否定项，据材料提到，秦始皇统一六国后创制了一套御玺，如任命国家官员，则封印“皇帝之玺”……，结合所学知识可知，这说明皇帝处于至高无上的地位，故A正确，排除。信玺和行玺的区别说明秦朝有内外两种系统处理国事，故B正确，排除。材料也说明秦朝实行中央集权的体制，故C正确，排除。材料未涉及三公九卿制大大提升行政效率，故D错误，符合题意')


outputs:tensor([[  5.4277, -11.6085, -12.3771,   5.1553, -14.5809, -16.3762, -12.2814,
         -13.0382, -10.7107,  -8.8357, -11.2915, -13.2646, -15.7242, -16.7242,
         -16.5250, -11.1445, -10.9181, -13.4590, -15.3357,  -9.9307, -11.5974,
         -10.8862,  -8.3890, -11.9590,  -8.9949, -12.6267, -12.4136,  -8.4198,
         -10.7091,  -7.1404, -11.0067, -12.2441, -12.0132,  -8.8051, -10.3733,
         -12.8393,   5.6295, -11.5690, -10.2945, -11.4761,  -5.0379, -14.8829,
          -4.5830, -15.2992,  -6.2327, -14.5351, -11.8961, -10.6002, -12.2793,
          -8.7982, -11.1525,  -8.9523,   5.9229,  -8.3157, -10.9583, -10.1933,
          -7.0526, -10.5578, -11.1429, -10.0483, -10.8888,  -9.9974, -11.8042,
         -12.3277, -12.1078, -12.9662, -15.0154,  -1.2440,  -9.5717, -11.5020,
         -11.6094,   5.5022, -15.0643]])
predict result:[[1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 



In [10]:
res = result[0]
for index in range(len(res)):
    if res[index] == 1:
        print(knowledge_points[index])

“重农抑商”政策
中央官制——三公九卿制
夏商两代的政治制度
皇帝制度
郡县制
