- 模型应用
  - 模型加载
  - 模型测试集评估
    - 准确率
    - 召回率
    - F1


In [45]:
# 测试集评论数据测试

import time

import torch
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report

import dataset
import dictionary
import model
from init import config

stopwords = dataset.load_stopwords(config.stopwords_path)
model = model.SentimentAnalysisModel().load(config.model_pt_path).to(config.device)
print('test_model[{:s}]'.format(config.model_pt_path), )
dictionary = dictionary.SentimentAnalysisDictionary().load(config.dictionary_pt_path)
print('dictionary[{:s}]'.format(config.dictionary_pt_path), )
test_contents, test_emotions = dataset.load_contents_labels(config.test_data_path)
test_tokens = dataset.tokenization(test_contents, stopwords)
test_dataset = dataset.SentimentAnalysisDataset(tokens=test_tokens, emotions=test_emotions,
                                                sequence_length=config.model_sequence_length,
                                                dictionary=dictionary, ).dataset

loss_fn = torch.nn.CrossEntropyLoss()
model.eval()
data, targets = test_dataset.tensors[0].to(config.device), test_dataset.tensors[1].to(config.device)
output = model(data)
test_loss = loss_fn(output, targets).item()
predicted = output.argmax(dim=1).cpu().numpy()
labels = targets.cpu().numpy()
print("每个类别的精确率和召回率：\n",
      classification_report(labels, predicted, target_names=dataset.emotion2label.keys()))

test_macro_precision = precision_score(labels, predicted, average='macro', zero_division=0)  # 宏精准率
test_macro_recall = recall_score(labels, predicted, average='macro', zero_division=0)  # 宏召回率
test_macro_f1 = f1_score(labels, predicted, average='macro')  # 宏F1值
print('test_loss[{:f}]'.format(test_loss),
      'test_macro_precision[{:f}]'.format(test_macro_precision),
      'test_macro_recall[{:f}]'.format(test_macro_recall),
      'test_macro_f1[{:f}]'.format(test_macro_f1),
      time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))

test_model[pt/sentiment_analysis_GRU_512_0.2_ReLU_100_35_128_50_0.001_0.8_pt]
dictionary[pt/dictionary.pt]
每个类别的精确率和召回率：
               precision    recall  f1-score   support

        非常满意       0.52      0.75      0.62       885
          满意       0.56      0.55      0.55      1247
          中立       0.25      0.43      0.32       268
         不满意       0.46      0.47      0.47      1150
       非常不满意       0.81      0.47      0.60      1320
        无效评论       0.19      0.20      0.19        41

    accuracy                           0.54      4911
   macro avg       0.47      0.48      0.46      4911
weighted avg       0.58      0.54      0.54      4911

test_loss[1.215363] test_macro_precision[0.467047] test_macro_recall[0.477122] test_macro_f1[0.457533] 2024-10-28 19:12:44


In [43]:
# 随机评论数据测试
import numpy
import torch
import model
import dataset
import dictionary
from init import config

stopwords = dataset.load_stopwords(config.stopwords_path)
model = model.SentimentAnalysisModel().load(config.model_pt_path).to(config.device)
print('test_model[{:s}]'.format(config.model_pt_path), )
dictionary = dictionary.SentimentAnalysisDictionary().load(config.dictionary_pt_path)
print('dictionary[{:s}]'.format(config.dictionary_pt_path), )

contents = ["感觉这个秤非常小巧精致实用。自从看到家人买来这个秤以后，其他的包括秤（包括金器秤）都被闲置了",
            "第三次购买了，还算可以，希望能更耐用一些。",
            "没什想说的，中规中矩，没什么色差",
            "秤盘不平，一边高一边低。这个价位不应该这样了。按键硬，按起来费力。底座盘不平，有一个脚悬空",
            "底座四个点不平，质量不行，本来就是有精度要求，结果买一个底座不平的电子秤。简直是一个笑话",
            "f2@#$%#@@2r22r22s阿斯头孢哦90823089"]

tokens = dataset.tokenization(contents, stopwords)
tokens = [[word for word in token if word in dictionary.word_id_dict] for token in tokens]
tokens_truncated = [token[:config.model_sequence_length] for token in tokens]
tokens_padded = [dataset.padding(token, config.model_sequence_length) for token in tokens_truncated]
tokens_ids = [[dictionary.word_id_dict[word] for word in token if word in dictionary.word_id_dict] for
              token in tokens_padded]

for i, token_id in enumerate(tokens_ids):
    data = torch.LongTensor(numpy.array(token_id)).unsqueeze(0).to(config.device)
    output = model(data)
    predicted = output.argmax(dim=1).cpu().numpy()
    print(contents[i], predicted[0], dataset.label2emotion.get(predicted[0]))

test_model[pt/sentiment_analysis_GRU_512_0.2_ReLU_100_35_128_50_0.001_0.8_pt]
dictionary[pt/dictionary.pt]
感觉这个秤非常小巧精致实用。自从看到家人买来这个秤以后，其他的包括秤（包括金器秤）都被闲置了 0 非常满意
第三次购买了，还算挺好的，希望能更耐用一些。 1 满意
没什想说的，中规中矩，没什么色差 2 中立
秤盘不平，一边高一边低。这个价位不应该这样了。按键硬，按起来费力。底座盘不平，有一个脚悬空 3 不满意
底座四个点不平，质量非常差，本来就是有精度要求，结果买一个底座不平的电子秤。简直是一个笑话 4 非常不满意
f2@#$%#@@2r22r22s阿斯头孢哦90823089 2 中立
