### **主題：啤酒評論評分預測 - 回歸模型建構**
### **說明：**
完成先前的課程之後，了解到有些問題其實是比較適合回歸模型，比方說評分預測的問題。<br />
因此這次以回歸模型，重新評估其各項屬性(appearance, aroma, overall, palate, taste)。<br />
另外，啤酒評論問題為多標籤問題(multi-label regression)，也必須特別注意。
### **題目：**
1. 參照前次分類模型的結構，將分類模型修改為回歸模型(完成底下的 BeerRateRegressor)
2. 修改完後，進行模型訓練，確認所修改的架構不會產生錯誤。
#### **提示1: 若因 GPU 限制無法快速訓練，可以考慮調低訓練回合數，MAX_LEN，或選擇較小的 BERT 模型。**

In [1]:
import os
import numpy as np
import pandas as pd

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import (
    BertModel,
    BertTokenizer,
    AdamW,
    get_linear_schedule_with_warmup
)

import warnings
warnings.filterwarnings('ignore')

In [2]:
PRE_TRAINED_MODEL_NAME = 'bert-base-cased'
BATCH_SIZE = 16
MAX_LEN = 256
EPOCHS = 10

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
TOKENIZER = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=213450.0, style=ProgressStyle(descripti…




In [3]:
class BeerDataset(Dataset):
    """
    將資料集轉換為 DataLoader 需求的 Dataset 形式
    Convert beer review dataframe into torch dataset instance
    """
    def __init__(self,
                 comments,
                 appearance_target,
                 aroma_target,
                 overall_target,
                 palate_target,
                 taste_target,
                 max_len):
        self.comments = comments
        self.appearance_target = appearance_target
        self.aroma_target = aroma_target
        self.overall_target = overall_target
        self.palate_target = palate_target
        self.taste_target = taste_target
        self.max_len = max_len

    def __len__(self):
        return len(self.comments)

    def __getitem__(self, item):
        comment = str(self.comments[item])
        appearance_target = self.appearance_target[item]
        aroma_target = self.aroma_target[item]
        overall_target = self.overall_target[item]
        palate_target = self.palate_target[item]
        taste_target = self.taste_target[item]
        encoding = TOKENIZER.encode_plus(
            comment,
            max_length=self.max_len,
            truncation=True,
            add_special_tokens=True,
            return_token_type_ids=False,
            pad_to_max_length=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        return {
            'comment': comment,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'appearance_target': torch.Tensor([appearance_target]),
            'aroma_target': torch.Tensor([aroma_target]),
            'overall_target': torch.Tensor([overall_target]),
            'palate_target': torch.Tensor([palate_target]),
            'taste_target': torch.Tensor([taste_target])
        }

In [4]:
def create_data_loader(dataframe, max_len, batch_size):
    """
    將 Dataset 包裝為 DataLoader
    convert dataset to pytorch dataloader format object
    """
    dataset = BeerDataset(
        comments=dataframe['review/text'],
        appearance_target=dataframe.review_appearance,
        aroma_target=dataframe.review_aroma,
        overall_target=dataframe.review_overall,
        palate_target=dataframe.review_palate,
        taste_target=dataframe.review_taste,
        max_len=max_len
    )

    return DataLoader(
        dataset,
        batch_size=batch_size
    )

In [5]:
class BeerRateRegressor(nn.Module):
    """
    啤酒評論評分回歸模型主體
    Main model of beer sentiment for review sentiment analyzer
    """
    def __init__(self):
        super(BeerRateRegressor, self).__init__()
        aspects = ['appearance', 'aroma', 'overall', 'palate', 'taste']

        self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
        self.aspect_outs = nn.ModuleDict({
            aspect: nn.Linear(self.bert.config.hidden_size, 1)
            for aspect in aspects
        })
        self.drop = nn.Dropout(0.2)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        out = self.drop(outputs.pooler_output)
        aspect_outputs = {
            aspect: aspect_out(out)
            for aspect, aspect_out in self.aspect_outs.items()
        }

        return aspect_outputs

In [6]:
def train_epoch(model,
                data_loader,
                loss_fn,
                optimizer,
                scheduler,
                n_examples):
    """
    回歸器的主要訓練流程
    Main training process of bert sentiment regressor
    """
    model = model.train()

    losses = []
    correct_predictions = 0.
    for batch in data_loader:
        input_ids = batch['input_ids'].to(DEVICE)
        attention_mask = batch['attention_mask'].to(DEVICE)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)

        preds = {
            aspect: output
            for aspect, output in outputs.items()
        }
        targets = {
            aspect: batch[f"{aspect}_target"].to(DEVICE)
            for aspect in preds.keys()
        }
        aspect_losses = {
            aspect: loss_fn(outputs[aspect], targets[aspect])
            for aspect in preds.keys()
        }

        loss = torch.stack([val for _, val in aspect_losses.items()]).sum()
        losses.append(loss.item())

        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

    return np.mean(losses)

In [7]:
def eval_model(model,
               data_loader,
               loss_fn,
               n_examples):
    """
    回歸器訓練時，每個 epoch 評估流程
    Main evaluate process in training of bert sentiment regressor
    """
    model = model.eval()

    losses = []
    correct_predictions = 0.
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask
            )

            preds = {
                aspect: output
                for aspect, output in outputs.items()
            }
            targets = {
                aspect: batch[f"{aspect}_target"].to(DEVICE)
                for aspect in preds.keys()
            }
            aspect_losses = {
                aspect: loss_fn(outputs[aspect], targets[aspect])
                for aspect in preds.keys()
            }

            loss = torch.stack([val for _, val in aspect_losses.items()]).sum()
            losses.append(loss.item())

    return np.mean(losses)

In [8]:
TRAIN = pd.read_json(os.path.join('data', 'train_set.json'), encoding='utf-8')
TRAIN = TRAIN.sample(frac=1).reset_index(drop=True)
VAL = pd.read_json(os.path.join('data', 'test_set.json'), encoding='utf-8')
VAL = VAL.sample(frac=1).reset_index(drop=True)

In [9]:
MODEL = BeerRateRegressor()
MODEL.to(DEVICE)

TRAIN_DATA_LOADER = create_data_loader(TRAIN, MAX_LEN, BATCH_SIZE)
VAL_DATA_LOADER = create_data_loader(VAL, MAX_LEN, BATCH_SIZE)

OPTIMIZER = AdamW(MODEL.parameters(), lr=2e-5, correct_bias=False)
TOTAL_STEPS = len(TRAIN_DATA_LOADER) * EPOCHS
SCHEDULER = get_linear_schedule_with_warmup(
    OPTIMIZER,
    num_warmup_steps=TOTAL_STEPS // 10,
    num_training_steps=TOTAL_STEPS
)
LOSS_FN = nn.MSELoss().to(DEVICE)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=435779157.0, style=ProgressStyle(descri…




In [10]:
BEST_LOSS = np.inf

for epoch in range(EPOCHS):
    print(f"Epoch {epoch + 1}/{EPOCHS}")
    print('-' * 10)

    train_loss = train_epoch(
        MODEL,
        TRAIN_DATA_LOADER,
        LOSS_FN,
        OPTIMIZER,
        SCHEDULER,
        len(TRAIN)
    )
    print(f"Train loss {train_loss}")

    val_loss = eval_model(
        MODEL,
        VAL_DATA_LOADER,
        LOSS_FN,
        len(VAL)
    )
    print(f"Val   loss {val_loss}")
    print()

    if val_loss < BEST_LOSS:
        os.makedirs('model', exist_ok=True)
        torch.save(MODEL.state_dict(), os.path.join('model', 'best_model_state.bin'))
        BEST_LOSS = val_loss

Epoch 1/10
----------
Train loss 3.5730655888751697
Val   loss 4.522369164628342

Epoch 2/10
----------
Train loss 2.482728103281042
Val   loss 4.752180860446284

Epoch 3/10
----------
Train loss 2.178296364014826
Val   loss 4.954977214526825

Epoch 4/10
----------
Train loss 1.8688349742886015
Val   loss 5.6202635399449745

Epoch 5/10
----------
Train loss 1.6103480685193725
Val   loss 5.6453618805248515

Epoch 6/10
----------
Train loss 1.4034408241923577
Val   loss 5.991086634964988

Epoch 7/10
----------
Train loss 1.2470992712990652
Val   loss 6.016986700673454

Epoch 8/10
----------
Train loss 1.1296801284204545
Val   loss 5.768154578467908

Epoch 9/10
----------
Train loss 1.0422642206514259
Val   loss 5.728507234265629

Epoch 10/10
----------
Train loss 0.9902885927981937
Val   loss 5.968100348600564

