# **目標**:
### 使用 BERT-chinese-base 訓練 Taipei_FAQ 分類器


# 安裝套件

In [1]:
pip install transformers datasets accelerate

Collecting transformers
  Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 34.4 MB/s 
[?25hCollecting datasets
  Downloading datasets-2.1.0-py3-none-any.whl (325 kB)
[K     |████████████████████████████████| 325 kB 63.7 MB/s 
[?25hCollecting accelerate
  Downloading accelerate-0.6.2-py3-none-any.whl (65 kB)
[K     |████████████████████████████████| 65 kB 4.5 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 56.2 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 49.2 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 54.8 MB/s 
[?

# 確認 GPU 分配

In [2]:
!nvidia-smi

Wed Apr 27 13:24:50 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Mount 雲端硬碟

In [4]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


## cd 到自己的雲端硬碟中的colab

In [5]:
%cd /content/drive/Shareddrives/NLP_LAB_S7/晨華偷用中/"Taipei_FAQ"

/content/drive/Shareddrives/NLP_LAB_S7/晨華偷用中/Taipei_FAQ


In [6]:
!ls

'BERT for Taipei_FAQ_classifier.ipynb'


# 資料前處理

In [7]:
import os
from torch.utils import data
import torch
import csv
from sklearn.preprocessing import LabelEncoder
import pickle
from pathlib import Path

In [24]:
### 將資料中的 taget 做 LabelEncoder，並將資料分別收集成 list ###
###  ex: questions=[問句1, 問句2,...]，tagets=[t1, t2,....]  ###

def read_data(path):
  path = Path(path)
  with open(path, 'r') as f:
    data = csv.reader(f,delimiter=',',quotechar='|')
    questions = []
    tagets = []
    
    for ele in data: 
      questions.append(ele[0]) 
      tagets.append(ele[1])

  return questions, tagets


In [30]:
train_questions, train_targets = read_data('/content/drive/Shareddrives/新手村/教學投影片/新生訓練/data/Taipei_FAQ/Taipei_FAQ_classifier_training.csv')
eval_questions, eval_targets = read_data('/content/drive/Shareddrives/新手村/教學投影片/新生訓練/data/Taipei_FAQ/Taipei_FAQ_classifier_testing.csv')

In [74]:
eval_questions

['臺北市專為高齡者開辦的課程或活動有哪些?可否有網站資料直接查詢?',
 '如何查詢藝文推廣處城市舞台檔期?',
 '個案若安置於機構，是否能使用失能身心障礙日間照顧中心服務?',
 '申請低收入戶18歲以上就學生活補助洽辦單位、應備文件、補助資格及補助內容?',
 '如何查詢訴願案件辦理進度?',
 '騎車時，若遇到車輛爆胎等故障情形，該怎麼辦?',
 '有關私立高中學雜費補助，孩子與爺爺同住，父母和孩子不同戶籍，請問這樣就不能申請私立高中學雜費補助?',
 '什麼是茲卡病毒感染症?',
 '請問如何成為衛生保健志工?',
 '公務人員能否擔任公司股東或公司之董事或監察人',
 '重型機車(紅牌、黃牌)可以停放汽車停車格嗎?',
 '106年度臺北市各有線電視業者收視費用是多少?',
 '是否有暑期工讀機會?每年何時辦理?',
 '本處稽查人員執法不公、選擇性開單?',
 '本人或家人持有身心障礙手冊或證明，如何辦理使用牌照稅免稅優惠?',
 '契稅如何計算?',
 '●軍人權益：「收到教育召集令如何請假?」',
 '什麼是新一代學生悠遊卡?',
 '是否可以自行製作環保兩用袋?',
 '小型車後座繫安全帶法令介紹',
 '性騷擾事件受害人可以找誰申訴?',
 '勞動檢查程序為何?',
 '請問海葬實施程序為何?',
 '只要2歲以下兒童符合申請資格，就有加碼的友善托育補助嗎?',
 '設立護理機構（護理之家、居家護理），該準備哪些文件?',
 '我剛從茲卡病毒流行地區回來，哪裡有茲卡病毒的快速診斷服務?',
 '溫泉可接用量如何計算?',
 '地籍謄本如何申請?有哪些申請管道?可以通訊申請或傳真申請嗎?傳真號碼為何?費用如何計收?',
 '如果感染了登革熱該怎麼辦?',
 '特殊境遇家庭扶助之扶助對象中，何謂「祖父母扶養18歲以下父母無力扶養之孫子女」?',
 '公司何時可以開始申請創業補助?',
 '建物存記或容積獎勵許可事項會在土地登記謄本載明嗎?',
 '林語堂故居營業時間?是否須收取門票?聯絡電話?交通資訊?',
 '攤販、夜市是否為限塑政策的限制對象?',
 '麻疹如何傳染?',
 '身心障礙者是小孩子無法開車，不能申請身心障礙者專用停車位識別證，可否銷單?',
 '在臺北市職能發展學院受訓，有幫我們保勞、健保嗎?',
 '企業經營者未提供契約審閱期

In [42]:
print("training data 筆數：", len(train_questions))
print("testing data 筆數：", len(eval_questions))
print("question：", train_questions[0])
print("target：", train_targets[0])

with open('label_encoder.pkl', 'rb') as reader:
  le = pickle.load(reader)
  # for i in range(len(train_targets)):
  #   print(i)
  #   print(train_targets[i])
  print("targets name：", le.transform([train_targets[0]]))

training data 筆數： 5397
testing data 筆數： 1350
question： 兵役-延期入營問答
target： 臺北市內湖區公所
targets name： [3]


https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


# 將資料進行 Tokenize
## 將 input 資料轉換成 token id 、tpye_id 與 attention_mask

In [43]:
from transformers import BertTokenizerFast
tokenizer = BertTokenizerFast.from_pretrained('bert-base-chinese')

train_encodings = tokenizer(train_questions, truncation=True, padding=True)
eval_encodings = tokenizer(eval_questions, truncation=True, padding=True)

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/107k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/263k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/624 [00:00<?, ?B/s]

## 檢查轉換是否正確

In [44]:
train_encodings.keys()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])

In [45]:
print(train_encodings['input_ids'][0])
print(tokenizer.decode(train_encodings['input_ids'][0]))

print(train_encodings['token_type_ids'][0])
print(train_encodings['attention_mask'][0])

[101, 1070, 2514, 118, 2454, 3309, 1057, 4245, 1558, 5031, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[CLS] 兵 役 - 延 期 入 營 問 答 [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] 

## 加入 Label

In [54]:
def add_targets(encodings, targets):
  encodings.update({'label': le.transform(targets)})

add_targets(train_encodings, train_targets)
add_targets(eval_encodings, eval_targets)

In [55]:
train_encodings["label"]

array([ 3, 38, 73, ..., 47, 73, 29])

In [56]:
print(train_encodings['label'][0])

3


# 定義 Dataset，並轉換成 tensor 格式

In [57]:
class Dataset(torch.utils.data.Dataset):
  def __init__(self, encodings):
    self.encodings = encodings

  def __getitem__(self, idx):
    return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}

  def __len__(self):
    return len(self.encodings.input_ids)

train_dataset = Dataset(train_encodings)
eval_dataset = Dataset(eval_encodings)

In [58]:
train_dataset[0]

{'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 'input_ids': tensor([ 101, 1070, 2514,  118, 2454, 3309, 1057, 4245, 1558, 5031,  102,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
            0,    0,  

# 載入模型架構( SequenceClassification )

In [59]:
from transformers import BertConfig, BertForSequenceClassification
config = BertConfig.from_pretrained('bert-base-chinese', num_labels=78)  #num_labels 設定類別數
model = BertForSequenceClassification.from_pretrained('bert-base-chinese', config=config)

Downloading:   0%|          | 0.00/393M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-chinese were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

## 查看模型架構

In [60]:
print(model)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(21128, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

# 訓練模型

In [61]:
import logging
import datasets
from datasets import load_dataset, load_metric
from torch.utils.data import DataLoader
from tqdm.auto import tqdm, trange
import math

import transformers
from accelerate import Accelerator
from transformers import (
    AdamW,
    AutoConfig,
    default_data_collator,
    get_scheduler
)

## 設定 epoch 與 batch size

In [63]:
train_batch_size = 5      # 設定 training batch size
eval_batch_size = 10      # 設定 eval batch size
num_train_epochs = 8      # 設定 epoch

## 將資料丟入 DataLoader


In [64]:
data_collator = default_data_collator
train_dataloader = DataLoader(train_dataset, shuffle=True, collate_fn=data_collator, batch_size=train_batch_size)
eval_dataloader = DataLoader(eval_dataset, collate_fn=data_collator, batch_size=eval_batch_size)

## Optimizer 、Learning rate 、Scheduler 設定

In [65]:
learning_rate=3e-5          # 設定 learning_rate
gradient_accumulation_steps = 1   # 設定 幾步後進行反向傳播

no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
    {
        "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },                                
    {
        "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },
]
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)

# Scheduler and math around the number of training steps.
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / gradient_accumulation_steps)
max_train_steps = num_train_epochs * num_update_steps_per_epoch
print('max_train_steps', max_train_steps)

# scheduler
lr_scheduler = get_scheduler(
    name="linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=max_train_steps,
)

max_train_steps 8640




## 將資料、參數丟入 Accelerator



In [66]:
# Initialize the accelerator. We will let the accelerator handle device placement for us in this example.
accelerator = Accelerator()

# Prepare everything with our `accelerator`.
model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
    model, optimizer, train_dataloader, eval_dataloader
)

## 設定 metric 方法

In [67]:
# Get the metric function

metric = load_metric("accuracy")

Downloading builder script:   0%|          | 0.00/1.41k [00:00<?, ?B/s]

## 開始訓練

In [69]:
# Train!
logger = logging.getLogger(__name__)
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO,
)
logger.info(accelerator.state)
output_dir = 'Taipei_FAQ_Classifier/'


total_batch_size = train_batch_size * accelerator.num_processes * gradient_accumulation_steps

logger.info("***** Running training *****")
logger.info(f"  Num examples = {len(train_dataset)}")
logger.info(f"  Num Epochs = {num_train_epochs}")
logger.info(f"  Instantaneous batch size per device = {train_batch_size}")
logger.info(f"  Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}")
logger.info(f"  Gradient Accumulation steps = {gradient_accumulation_steps}")
logger.info(f"  Total optimization steps = {max_train_steps}")


completed_steps = 0
best_epoch = {"epoch:": 0, "acc": 0 }

for epoch in trange(num_train_epochs, desc="Epoch"):
  model.train()
  for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
    outputs = model(**batch)
    loss = outputs.loss
    loss = loss / gradient_accumulation_steps
    accelerator.backward(loss)
    if step % gradient_accumulation_steps == 0 or step == len(train_dataloader) - 1:
      optimizer.step()
      lr_scheduler.step()
      optimizer.zero_grad()
      completed_steps += 1

    if step % 50 == 0:
      print({'epoch': epoch, 'step': step, 'loss': loss.item()})

    if completed_steps >= max_train_steps:
      break
      
  logger.info("***** Running eval *****")
  model.eval()
  for step, batch in enumerate(tqdm(eval_dataloader, desc="Eval Iteration")):
    outputs = model(**batch)
    predictions = outputs.logits.argmax(dim=-1)
    metric.add_batch(
        predictions=accelerator.gather(predictions),
        references=accelerator.gather(batch["labels"]),
    )

  eval_metric = metric.compute()
  logger.info(f"epoch {epoch}: {eval_metric}")
  # if eval_metric > best_epoch['acc']:
  #   best_epoch['epoch'] = num_train_epochs
  #   best_epoch['acc'] = eval_metric


  if output_dir is not None:
    accelerator.wait_for_everyone()
    unwrapped_model = accelerator.unwrap_model(model)
    unwrapped_model.save_pretrained(output_dir + 'epoch_' + str(num_train_epochs) + '/', save_function=accelerator.save)


04/27/2022 13:58:36 - INFO - __main__ - Distributed environment: NO
Num processes: 1
Process index: 0
Local process index: 0
Device: cuda
Mixed precision type: no

04/27/2022 13:58:36 - INFO - __main__ - ***** Running training *****
04/27/2022 13:58:36 - INFO - __main__ -   Num examples = 5397
04/27/2022 13:58:36 - INFO - __main__ -   Num Epochs = 8
04/27/2022 13:58:36 - INFO - __main__ -   Instantaneous batch size per device = 5
04/27/2022 13:58:36 - INFO - __main__ -   Total train batch size (w. parallel, distributed & accumulation) = 5
04/27/2022 13:58:36 - INFO - __main__ -   Gradient Accumulation steps = 1
04/27/2022 13:58:36 - INFO - __main__ -   Total optimization steps = 8640


Epoch:   0%|          | 0/8 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1080 [00:00<?, ?it/s]

{'epoch': 0, 'step': 0, 'loss': 1.2676838636398315}
{'epoch': 0, 'step': 50, 'loss': 1.1291825771331787}
{'epoch': 0, 'step': 100, 'loss': 1.8198169469833374}
{'epoch': 0, 'step': 150, 'loss': 0.7175605297088623}
{'epoch': 0, 'step': 200, 'loss': 1.2267824411392212}
{'epoch': 0, 'step': 250, 'loss': 1.679508924484253}
{'epoch': 0, 'step': 300, 'loss': 0.5628188848495483}
{'epoch': 0, 'step': 350, 'loss': 1.0908210277557373}
{'epoch': 0, 'step': 400, 'loss': 1.0842207670211792}
{'epoch': 0, 'step': 450, 'loss': 0.3686910569667816}
{'epoch': 0, 'step': 500, 'loss': 1.8252525329589844}
{'epoch': 0, 'step': 550, 'loss': 0.3850345015525818}
{'epoch': 0, 'step': 600, 'loss': 0.8951374888420105}
{'epoch': 0, 'step': 650, 'loss': 1.1300337314605713}
{'epoch': 0, 'step': 700, 'loss': 1.4441601037979126}
{'epoch': 0, 'step': 750, 'loss': 0.8455973863601685}
{'epoch': 0, 'step': 800, 'loss': 1.0864527225494385}
{'epoch': 0, 'step': 850, 'loss': 0.953093409538269}
{'epoch': 0, 'step': 900, 'loss':

04/27/2022 14:01:37 - INFO - __main__ - ***** Running eval *****


Eval Iteration:   0%|          | 0/135 [00:00<?, ?it/s]

04/27/2022 14:01:47 - INFO - __main__ - epoch 0: {'accuracy': 0.78}


Iteration:   0%|          | 0/1080 [00:00<?, ?it/s]

{'epoch': 1, 'step': 0, 'loss': 1.2963584661483765}
{'epoch': 1, 'step': 50, 'loss': 0.5048292279243469}
{'epoch': 1, 'step': 100, 'loss': 0.5499793887138367}
{'epoch': 1, 'step': 150, 'loss': 0.27320748567581177}
{'epoch': 1, 'step': 200, 'loss': 0.6091972589492798}
{'epoch': 1, 'step': 250, 'loss': 0.2798631191253662}
{'epoch': 1, 'step': 300, 'loss': 0.39633920788764954}
{'epoch': 1, 'step': 350, 'loss': 0.3914615511894226}
{'epoch': 1, 'step': 400, 'loss': 0.367374062538147}
{'epoch': 1, 'step': 450, 'loss': 0.11620035022497177}
{'epoch': 1, 'step': 500, 'loss': 0.5586121678352356}
{'epoch': 1, 'step': 550, 'loss': 0.4316614270210266}
{'epoch': 1, 'step': 600, 'loss': 0.20364448428153992}
{'epoch': 1, 'step': 650, 'loss': 0.3181617856025696}
{'epoch': 1, 'step': 700, 'loss': 0.3200454115867615}
{'epoch': 1, 'step': 750, 'loss': 0.3189034163951874}
{'epoch': 1, 'step': 800, 'loss': 0.2739177644252777}
{'epoch': 1, 'step': 850, 'loss': 0.26604050397872925}
{'epoch': 1, 'step': 900, '

04/27/2022 14:04:49 - INFO - __main__ - ***** Running eval *****


Eval Iteration:   0%|          | 0/135 [00:00<?, ?it/s]

04/27/2022 14:04:59 - INFO - __main__ - epoch 1: {'accuracy': 0.8133333333333334}


Iteration:   0%|          | 0/1080 [00:00<?, ?it/s]

{'epoch': 2, 'step': 0, 'loss': 1.3095347881317139}
{'epoch': 2, 'step': 50, 'loss': 0.09690184891223907}
{'epoch': 2, 'step': 100, 'loss': 0.641072154045105}
{'epoch': 2, 'step': 150, 'loss': 0.7719433903694153}
{'epoch': 2, 'step': 200, 'loss': 0.2721879184246063}
{'epoch': 2, 'step': 250, 'loss': 0.09484794735908508}
{'epoch': 2, 'step': 300, 'loss': 0.3747842311859131}
{'epoch': 2, 'step': 350, 'loss': 0.07854817062616348}
{'epoch': 2, 'step': 400, 'loss': 0.05075722187757492}
{'epoch': 2, 'step': 450, 'loss': 0.05733947828412056}
{'epoch': 2, 'step': 500, 'loss': 0.42602795362472534}
{'epoch': 2, 'step': 550, 'loss': 0.42115211486816406}
{'epoch': 2, 'step': 600, 'loss': 0.07086185365915298}
{'epoch': 2, 'step': 650, 'loss': 0.5532737970352173}
{'epoch': 2, 'step': 700, 'loss': 0.13888932764530182}
{'epoch': 2, 'step': 750, 'loss': 0.2484530508518219}
{'epoch': 2, 'step': 800, 'loss': 0.3188069462776184}
{'epoch': 2, 'step': 850, 'loss': 0.18146724998950958}
{'epoch': 2, 'step': 9

04/27/2022 14:08:01 - INFO - __main__ - ***** Running eval *****


Eval Iteration:   0%|          | 0/135 [00:00<?, ?it/s]

04/27/2022 14:08:12 - INFO - __main__ - epoch 2: {'accuracy': 0.8244444444444444}


Iteration:   0%|          | 0/1080 [00:00<?, ?it/s]

{'epoch': 3, 'step': 0, 'loss': 0.14316771924495697}
{'epoch': 3, 'step': 50, 'loss': 0.050348199903964996}
{'epoch': 3, 'step': 100, 'loss': 0.048668909817934036}
{'epoch': 3, 'step': 150, 'loss': 0.7275693416595459}
{'epoch': 3, 'step': 200, 'loss': 0.053569186478853226}
{'epoch': 3, 'step': 250, 'loss': 0.057621538639068604}
{'epoch': 3, 'step': 300, 'loss': 0.0996178686618805}
{'epoch': 3, 'step': 350, 'loss': 0.04545246437191963}
{'epoch': 3, 'step': 400, 'loss': 0.043049197643995285}
{'epoch': 3, 'step': 450, 'loss': 0.0652327686548233}
{'epoch': 3, 'step': 500, 'loss': 0.10373030602931976}
{'epoch': 3, 'step': 550, 'loss': 0.09232877939939499}
{'epoch': 3, 'step': 600, 'loss': 0.07829618453979492}
{'epoch': 3, 'step': 650, 'loss': 0.2706189453601837}
{'epoch': 3, 'step': 700, 'loss': 0.0788913369178772}
{'epoch': 3, 'step': 750, 'loss': 0.10921188443899155}
{'epoch': 3, 'step': 800, 'loss': 0.13489100337028503}
{'epoch': 3, 'step': 850, 'loss': 0.619379997253418}
{'epoch': 3, 's

04/27/2022 14:11:14 - INFO - __main__ - ***** Running eval *****


Eval Iteration:   0%|          | 0/135 [00:00<?, ?it/s]

04/27/2022 14:11:25 - INFO - __main__ - epoch 3: {'accuracy': 0.8274074074074074}


Iteration:   0%|          | 0/1080 [00:00<?, ?it/s]

{'epoch': 4, 'step': 0, 'loss': 0.02508435770869255}
{'epoch': 4, 'step': 50, 'loss': 0.027022430673241615}
{'epoch': 4, 'step': 100, 'loss': 0.16365087032318115}
{'epoch': 4, 'step': 150, 'loss': 0.05242840200662613}
{'epoch': 4, 'step': 200, 'loss': 0.10595953464508057}
{'epoch': 4, 'step': 250, 'loss': 0.10909620672464371}
{'epoch': 4, 'step': 300, 'loss': 0.10521425306797028}
{'epoch': 4, 'step': 350, 'loss': 0.0378769151866436}
{'epoch': 4, 'step': 400, 'loss': 0.276603639125824}
{'epoch': 4, 'step': 450, 'loss': 0.05124928429722786}
{'epoch': 4, 'step': 500, 'loss': 0.016858112066984177}
{'epoch': 4, 'step': 550, 'loss': 0.23986582458019257}
{'epoch': 4, 'step': 600, 'loss': 0.046266160905361176}
{'epoch': 4, 'step': 650, 'loss': 0.0533570721745491}
{'epoch': 4, 'step': 700, 'loss': 0.046732138842344284}
{'epoch': 4, 'step': 750, 'loss': 0.04398156702518463}
{'epoch': 4, 'step': 800, 'loss': 0.058935295790433884}
{'epoch': 4, 'step': 850, 'loss': 0.0581619068980217}
{'epoch': 4, 

04/27/2022 14:14:26 - INFO - __main__ - ***** Running eval *****


Eval Iteration:   0%|          | 0/135 [00:00<?, ?it/s]

04/27/2022 14:14:37 - INFO - __main__ - epoch 4: {'accuracy': 0.84}


Iteration:   0%|          | 0/1080 [00:00<?, ?it/s]

{'epoch': 5, 'step': 0, 'loss': 0.04561994969844818}
{'epoch': 5, 'step': 50, 'loss': 0.036887429654598236}
{'epoch': 5, 'step': 100, 'loss': 0.06599406152963638}
{'epoch': 5, 'step': 150, 'loss': 0.028270507231354713}
{'epoch': 5, 'step': 200, 'loss': 0.026975926011800766}
{'epoch': 5, 'step': 250, 'loss': 0.055272769182920456}
{'epoch': 5, 'step': 300, 'loss': 0.04241779446601868}
{'epoch': 5, 'step': 350, 'loss': 0.04577174037694931}
{'epoch': 5, 'step': 400, 'loss': 0.026737520471215248}
{'epoch': 5, 'step': 450, 'loss': 0.03624686226248741}
{'epoch': 5, 'step': 500, 'loss': 0.05547149106860161}
{'epoch': 5, 'step': 550, 'loss': 0.03737349063158035}
{'epoch': 5, 'step': 600, 'loss': 0.05670350790023804}
{'epoch': 5, 'step': 650, 'loss': 0.09398209303617477}
{'epoch': 5, 'step': 700, 'loss': 0.03746500611305237}
{'epoch': 5, 'step': 750, 'loss': 0.03499027341604233}
{'epoch': 5, 'step': 800, 'loss': 0.17775958776474}
{'epoch': 5, 'step': 850, 'loss': 0.07257789373397827}
{'epoch': 5

04/27/2022 14:17:39 - INFO - __main__ - ***** Running eval *****


Eval Iteration:   0%|          | 0/135 [00:00<?, ?it/s]

04/27/2022 14:17:49 - INFO - __main__ - epoch 5: {'accuracy': 0.8392592592592593}


Iteration:   0%|          | 0/1080 [00:00<?, ?it/s]

{'epoch': 6, 'step': 0, 'loss': 0.033243097364902496}
{'epoch': 6, 'step': 50, 'loss': 0.03020099736750126}
{'epoch': 6, 'step': 100, 'loss': 0.04147477447986603}
{'epoch': 6, 'step': 150, 'loss': 0.043028414249420166}
{'epoch': 6, 'step': 200, 'loss': 0.01741836965084076}
{'epoch': 6, 'step': 250, 'loss': 0.05589468404650688}
{'epoch': 6, 'step': 300, 'loss': 0.05696014314889908}
{'epoch': 6, 'step': 350, 'loss': 0.01588406041264534}
{'epoch': 6, 'step': 400, 'loss': 0.019545085728168488}
{'epoch': 6, 'step': 450, 'loss': 0.02581125870347023}
{'epoch': 6, 'step': 500, 'loss': 0.07731140404939651}
{'epoch': 6, 'step': 550, 'loss': 0.024320680648088455}
{'epoch': 6, 'step': 600, 'loss': 0.018725620582699776}
{'epoch': 6, 'step': 650, 'loss': 0.042141713201999664}
{'epoch': 6, 'step': 700, 'loss': 0.025807300582528114}
{'epoch': 6, 'step': 750, 'loss': 0.037033818662166595}
{'epoch': 6, 'step': 800, 'loss': 0.025554532185196877}
{'epoch': 6, 'step': 850, 'loss': 0.022609539330005646}
{'e

04/27/2022 14:20:51 - INFO - __main__ - ***** Running eval *****


Eval Iteration:   0%|          | 0/135 [00:00<?, ?it/s]

04/27/2022 14:21:02 - INFO - __main__ - epoch 6: {'accuracy': 0.8429629629629629}


Iteration:   0%|          | 0/1080 [00:00<?, ?it/s]

{'epoch': 7, 'step': 0, 'loss': 0.02530512586236}
{'epoch': 7, 'step': 50, 'loss': 0.03471960872411728}
{'epoch': 7, 'step': 100, 'loss': 0.03316183015704155}
{'epoch': 7, 'step': 150, 'loss': 0.030439119786024094}
{'epoch': 7, 'step': 200, 'loss': 0.020504822954535484}
{'epoch': 7, 'step': 250, 'loss': 0.017067860811948776}
{'epoch': 7, 'step': 300, 'loss': 0.020822852849960327}
{'epoch': 7, 'step': 350, 'loss': 0.028088515624403954}
{'epoch': 7, 'step': 400, 'loss': 0.031124060973525047}
{'epoch': 7, 'step': 450, 'loss': 0.01632787473499775}
{'epoch': 7, 'step': 500, 'loss': 0.04949803277850151}
{'epoch': 7, 'step': 550, 'loss': 0.010876971296966076}
{'epoch': 7, 'step': 600, 'loss': 0.09000562876462936}
{'epoch': 7, 'step': 650, 'loss': 0.022952554747462273}
{'epoch': 7, 'step': 700, 'loss': 0.013916729018092155}
{'epoch': 7, 'step': 750, 'loss': 0.06550528109073639}
{'epoch': 7, 'step': 800, 'loss': 0.03831968456506729}
{'epoch': 7, 'step': 850, 'loss': 0.03961867094039917}
{'epoch

04/27/2022 14:24:03 - INFO - __main__ - ***** Running eval *****


Eval Iteration:   0%|          | 0/135 [00:00<?, ?it/s]

04/27/2022 14:24:14 - INFO - __main__ - epoch 7: {'accuracy': 0.8429629629629629}


In [70]:
print(best_epoch)

{'epoch:': 0, 'acc': 0}


# 分析模型

In [None]:
# **計算 Precision, Recall, F1-score **

In [None]:
# **產生 confusion matrix heatmap **

# Inference

In [72]:
from transformers import BertConfig, BertForSequenceClassification
config = BertConfig.from_pretrained("./model/epoch_8/config.json")  #num_labels 設定類別數
model = BertForSequenceClassification.from_pretrained("./model/epoch_8/pytorch_model.bin", config=config)

In [82]:
# **撰寫預測程式**
def FAQ_model(model, question):
  input_encodings = tokenizer([question], truncation=True, padding=True)
  input_dataset = Dataset(input_encodings)
  data_collator = default_data_collator
  input_dataloader = DataLoader(input_dataset, collate_fn=data_collator, batch_size=1)  

  accelerator = Accelerator()
  model, input_dataloader = accelerator.prepare(model, input_dataloader)

  for batch in input_dataloader:
    outputs = model(**batch)
    predicted = outputs.logits.argmax(dim=-1)
  return predicted.item()

In [84]:
questions = ['大型家具及家電如何丟棄', '臺北二二八紀念館開館及參觀時間?交通方式?', '工讀期間可以請假嗎?請假要不要扣薪水?']

for question in questions:
  answer = FAQ_model(model, question)  
  print(le.inverse_transform([answer])[0])

臺北市政府環境保護局環境清潔管理科
臺北市政府文化局
臺北市政府勞動局勞動基準科
