In [1]:
import os
import json
from functools import partial
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim import AdamW
from transformers import BertTokenizer
from sklearn.metrics import classification_report

In [2]:
from dataset import Dataset_NER, ner_collate_fn
from modeling_ner import Bert_NER
from tag_id_converter import Tag_ID_Converter

In [3]:
PRETAINED_MODEL_NAME = 'bert-base-multilingual-cased'
tokenizer = BertTokenizer.from_pretrained(PRETAINED_MODEL_NAME)

In [4]:
PATH_dir = '../data/ko_NER_data'
# PATH_dir = '../0_data/En_NER_POS'
PATH_ko_train = os.path.join(PATH_dir, 'prepro_train.json')
PATH_ko_test = os.path.join(PATH_dir, 'prepro_test.json')
PATH_ko_dev = os.path.join(PATH_dir, 'prepro_dev.json')
# PATH_ko_dev = os.path.join(PATH_dir, 'prepro_valid.json')
PATH_tag_cnt_dict = os.path.join(PATH_dir, 'prepro_tag_cnt.json')

In [5]:
dataset_train = Dataset_NER(PATH_ko_train)
dataset_test = Dataset_NER(PATH_ko_test)
dataset_dev = Dataset_NER(PATH_ko_dev)

In [6]:
tag_converter = Tag_ID_Converter(PATH_tag_cnt_dict)

In [7]:
print('train', len(dataset_train))
print('test', len(dataset_test))
print('dev', len(dataset_dev))
tag_converter.id_to_tag

train 14987
test 3684
dev 3466


{0: '[PAD]',
 1: 'O',
 2: 'B-LOC',
 3: 'I-LOC',
 4: 'B-MISC',
 5: 'I-MISC',
 6: 'B-ORG',
 7: 'I-ORG',
 8: 'B-PER',
 9: 'I-PER'}

In [8]:
batch_size = 25
partial_collate_fn = partial(ner_collate_fn, tokenizer, tag_converter)

In [9]:
dataloader_train = DataLoader(
    dataset_train,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=partial_collate_fn
)
dataloader_test = DataLoader(
    dataset_test,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=partial_collate_fn
)
dataloader_dev = DataLoader(
    dataset_dev,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=partial_collate_fn
)

In [10]:
tag_num = len(tag_converter.tag_to_id)
model = Bert_NER(tag_num)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [11]:
CELoss = nn.CrossEntropyLoss(ignore_index=0)
optimizer = AdamW(model.parameters(), lr=1.0e-5)

In [28]:
model.cuda(6)
device = model.bert.device
print(device)

cuda:6


In [29]:
for epoch in range(train_epoch):
    model.train()

    for iteration, batch in enumerate(dataloader_train):
        batch_inputs = {k: v.cuda(device) for k, v in list(batch[0].items())}
        batch_labels = batch[1].cuda(device)

        output = model(**batch_inputs)
        loss = CELoss(output.view(-1, output.size(-1)), batch_labels.view(-1))

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()

        if (iteration + 1) % 10 == 0:
            print(f'{iteration:3} - loss: {loss.item()}')

    # todo 매 에포크가 끝나면 dev 데이터로 성능 비교하기
    # Early Stopping 적용하기

  9 - loss: 0.045971594750881195
 19 - loss: 0.07420851290225983
 29 - loss: 0.042183469980955124
 39 - loss: 0.05495704337954521
 49 - loss: 0.044697172939777374
 59 - loss: 0.10525450110435486
 69 - loss: 0.07434636354446411
 79 - loss: 0.0314885675907135
 89 - loss: 0.05568372830748558
 99 - loss: 0.08446627855300903
109 - loss: 0.038816262036561966
119 - loss: 0.01850540190935135
129 - loss: 0.062396954745054245
139 - loss: 0.05694695934653282
149 - loss: 0.1562674343585968
159 - loss: 0.09933022409677505
169 - loss: 0.09043771028518677
179 - loss: 0.016645994037389755
189 - loss: 0.113365039229393
199 - loss: 0.06663306802511215
209 - loss: 0.027949946001172066
219 - loss: 0.08451977372169495
229 - loss: 0.031918298453092575
239 - loss: 0.01505357027053833
249 - loss: 0.036662887781858444
259 - loss: 0.12471558898687363
269 - loss: 0.12075987458229065
279 - loss: 0.09429646283388138
289 - loss: 0.06645599007606506
299 - loss: 0.044448964297771454
309 - loss: 0.01082624588161707
31

In [30]:
model.eval()

gold_list = []
pred_list = []

with torch.no_grad():
    for iteration, batch in enumerate(dataloader_test):
        batch_inputs = {k: v.cuda(device) for k, v in list(batch[0].items())}
        batch_labels = batch[1].cuda(device)
        
        output = model(**batch_inputs)
        loss = CELoss(output.view(-1, output.size(-1)), batch_labels.view(-1))
        
        print('loss:', loss.item())
        pred_ids = torch.argmax(output, dim=-1)
        
        for g, p in zip(batch_labels, pred_ids):
            gold_mask = g != tag_converter.pad_id
            
            gold = tag_converter.convert_ids_to_tags(g[gold_mask].tolist())
            pred = tag_converter.convert_ids_to_tags(p[gold_mask].tolist())
            gold_list.append(gold)
            pred_list.append(pred)
            
            print(gold)
            print(pred)

loss: 0.03453114256262779
['O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'B-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PER', 'I-PER', 'I-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'B-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'I-LOC', 'I-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER']
['B-PER', 'I-PER', 'I-PER', 'I-PER', 'I-PER']
['B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'O', 'B-LOC', 'I-LOC', 'I-LOC', 'O', 'O', 'O', 'O', 'O']
['B-LOC', 'I-LOC', 'I-LOC', 'I-LOC', 'O', 'B-LOC', 'I-LOC', 'I-LOC', 'O', 'O', 'O', 'O', 'O']
['B-LOC', 'O', 'O', 'O', 'O', 'O', 'B-MISC', 'I-MISC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['B-LOC', 'O', 'O', 'O', 'O', 'O', 'B-MISC', 'I-MISC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'O', 'O', 'B-MISC', 'I-MISC', 'O', 'O', 'O', 'O', 'O']


In [15]:
gold_list_flat = []
pred_list_flat = []
for g, p in zip(gold_list, pred_list):
    gold_list_flat += g
    pred_list_flat += p

In [16]:
print(classification_report(gold_list_flat, pred_list_flat, digits=5, labels=list(tag_converter.tag_to_id.keys())[1:]))

              precision    recall  f1-score   support

           O    0.99370   0.98594   0.98980     50068
       B-LOC    0.93548   0.90408   0.91951      1668
       I-LOC    0.91500   0.87285   0.89343      1628
      B-MISC    0.72215   0.82194   0.76882       702
      I-MISC    0.53084   0.67780   0.59539       838
       B-ORG    0.87194   0.90187   0.88665      1661
       I-ORG    0.89207   0.92558   0.90852      2795
       B-PER    0.95836   0.95362   0.95598      1617
       I-PER    0.96085   0.97176   0.96628      3435

    accuracy                        0.96881     64412
   macro avg    0.86449   0.89060   0.87604     64412
weighted avg    0.97103   0.96881   0.96972     64412



In [22]:
def get_chunk_type(tag_name):
    tag_class = tag_name.split('-')[0]
    tag_type = tag_name.split('-')[-1]
    return tag_class, tag_type

In [23]:
def get_chunks(seq):
    default = "O"

    chunks = []
    chunk_type, chunk_start = None, None
    for i, tok in enumerate(seq):
        # End of a chunk 1
        if tok == default and chunk_type is not None:
            # Add a chunk.
            chunk = (chunk_type, chunk_start, i)
            chunks.append(chunk)
            chunk_type, chunk_start = None, None

        # End of a chunk + start of a chunk!
        elif tok != default:
            tok_chunk_class, tok_chunk_type = get_chunk_type(tok)
            if chunk_type is None:
                chunk_type, chunk_start = tok_chunk_type, i
            elif tok_chunk_type != chunk_type or tok_chunk_class == "B":
                chunk = (chunk_type, chunk_start, i)
                chunks.append(chunk)
                chunk_type, chunk_start = tok_chunk_type, i
        else:
            pass

    # end condition
    if chunk_type is not None:
        chunk = (chunk_type, chunk_start, len(seq))
        chunks.append(chunk)

    return chunks

In [26]:
def evaluate_ner_F1(total_answers, total_preds):
    num_match = num_preds = num_answers = 0

    for answers, preds in zip(total_answers, total_preds):

        answer_seg_result = set(get_chunks(answers))
        pred_seg_result = set(get_chunks(preds))

        num_match += len(answer_seg_result & pred_seg_result)
        num_answers += len(answer_seg_result)
        num_preds += len(pred_seg_result)

    precision = 100.0 * num_match / num_preds
    recall = 100.0 * num_match / num_answers
    F1 = 2 * precision * recall / (precision + recall)

    return precision, recall, F1

In [27]:
evaluate_ner_F1(gold_list, pred_list)

(85.47921967769297, 89.21742209631728, 87.30832539201248)