### **主題：啤酒評論評分預測 - 分類模型訓練結果測試**
### **說明：**
花了不少時間終於完成模型的訓練，接下來就剩下最後一個重要的步驟 - 驗證模型。<br />
這是一個相當重要的部分，如何從訓練結果的驗證模型的好壞，並進一步得知如何改善模型是實務中必經的一個過程。

### **題目：**
1. 建構出模型的預測框架，並且載入上次訓練完畢的模型
2. 以預測框架評估先前作業所分割出資料集的各項評分
3. 預測完成之後計算每出各項評分的準確度(accuracy)與判定係數(R square)

In [1]:
import os
import pandas as pd
from tqdm.notebook import tqdm
import sklearn.metrics as metrics

import torch
from torch import nn
from transformers import BertTokenizer, BertConfig, BertModel

import warnings
warnings.filterwarnings('ignore')

In [2]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
PRE_TRAINED_MODEL_NAME = 'bert-base-cased'
PRE_TRAINED_MODEL_CONFIG = BertConfig.from_pretrained(PRE_TRAINED_MODEL_NAME)
TOKENIZER = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
MAX_LEN = 256

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=213450.0, style=ProgressStyle(descripti…




In [3]:
class BeerRateClassifier(nn.Module):
    """
    啤酒評論評分分類模型主體
    Main model of beer sentiment for review sentiment analyzer
    """
    def __init__(self,
                 appearance_n_classes,
                 aroma_n_classes,
                 overall_n_classes,
                 palate_n_classes,
                 taste_n_classes):
        super(BeerRateClassifier, self).__init__()
        aspects = {
            'appearance': appearance_n_classes,
            'aroma': aroma_n_classes,
            'overall': overall_n_classes,
            'palate': palate_n_classes,
            'taste': taste_n_classes
        }

        self.bert = BertModel(PRE_TRAINED_MODEL_CONFIG)
        self.aspect_outs = nn.ModuleDict({
            aspect: nn.Linear(self.bert.config.hidden_size, n_classes)
            for aspect, n_classes in aspects.items()  
        })
        
    def load_model(self, path):
        """
        載入先前訓練好的權重檔
        """
        self.load_state_dict(torch.load(path, map_location=DEVICE))

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        out = outputs.pooler_output
        aspect_outputs = {
            aspect: aspect_out(out)
            for aspect, aspect_out in self.aspect_outs.items()
        }

        return aspect_outputs

    def predicts(self, text):
        """
        將 input text (啤酒評論)輸入模型，輸出預測評分
        make prediction according to the text with the given model
        """
        encoding = TOKENIZER.encode_plus(
            text,
            max_length=MAX_LEN,
            truncation=True,
            add_special_tokens=True,
            return_token_type_ids=False,
            pad_to_max_length=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        aspect_outputs = self.forward(
            input_ids=encoding['input_ids'],
            attention_mask=encoding['attention_mask']
        )
        preds = {
            aspect: torch.max(output, dim=1)[1].item()
            for aspect, output in aspect_outputs.items()
        }

        return preds

In [4]:
TEST = pd.read_json(os.path.join('data', 'test_set.json'), encoding='utf-8')

In [5]:
# 分類模型
MODEL = BeerRateClassifier(4, 4, 4, 4, 4)
MODEL.load_model(os.path.join('model', 'best_model_state.bin'))

In [6]:
data = []
for _, row in tqdm(TEST.iterrows(), total=TEST.shape[0]):
    if row['review/text'] is not None:
        text = row['review/text'].replace('<br />', ' ')
    else:
        text = ''
    predict = MODEL.predicts(text)
    data.append(predict)

HBox(children=(FloatProgress(value=0.0, max=5000.0), HTML(value='')))




In [7]:
aspects = list(data[0].keys())
for aspect in aspects:
    preds = [d[aspect] for d in data]
    labels = [row[f"review_{aspect}"] for _, row in TEST.iterrows()]

    print(f"---{aspect}---")
    print(f"Accuracy: {metrics.accuracy_score(labels, preds)}")
    print(f"R2 Score: {metrics.r2_score(labels, preds)}")

---appearance---
Accuracy: 0.5408
R2 Score: 0.0
---aroma---
Accuracy: 0.5266
R2 Score: 0.0
---overall---
Accuracy: 0.51
R2 Score: 0.0
---palate---
Accuracy: 0.4718
R2 Score: 0.0
---taste---
Accuracy: 0.5584
R2 Score: 0.0
