## BERTのベースラインをスクリプトから実行できるようにリファクタリング --
### 推論編

In [8]:
import pandas as pd

from transformers import AutoTokenizer
from torch.utils.data import DataLoader
from glob import glob

from bert_utils import *
from config import *

In [2]:
RUN_ID = "refactor"
output_path = f"./output/{RUN_ID}/"
settings = pd.read_json(f"{output_path}settings.json", typ="series")

In [3]:
model_paths = glob(f"{settings.output_path}*.pth"); model_paths.sort()
model_paths

['./output/refactor/model-fold0.pth',
 './output/refactor/model-fold1.pth',
 './output/refactor/model-fold2.pth',
 './output/refactor/model-fold3.pth',
 './output/refactor/model-fold4.pth']

In [4]:
test_df = pd.read_feather(f"{settings.output_path}test_df.feather")

In [5]:
tokenizer = AutoTokenizer.from_pretrained(
    settings.model_name,
    mecab_kwargs={"mecab_dic":None, "mecab_option": f"-d {dic_neologd}"}
)

In [6]:
test_dataset = HateSpeechDataset(
    test_df, tokenizer=tokenizer, 
    max_length=settings.max_length, num_classes=settings.num_classes, 
    text_col="clean_text", isTrain=False
    )

In [9]:
test_loader = DataLoader(test_dataset, batch_size=settings.test_batch_size, num_workers=2, shuffle=False, pin_memory=True)

In [21]:
preds_list = []
for fold in range(0, settings.folds):
    model_id = model_paths[fold].split("/")[3].split(".")[0].split("-")[0]
    preds = inference(settings.model_name, settings.num_classes, model_paths[fold], test_loader, device)
        
    for _class in range(0, settings.num_classes):
        test_df.loc[:, f"{model_id}_oof_class_{_class}"] = preds[:, _class]

    preds_list.append(preds)

Some weights of the model checkpoint at cl-tohoku/bert-base-japanese-whole-word-masking were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model : ./output/refactor/model-fold0.pth


100%|██████████| 51/51 [00:03<00:00, 15.60it/s]
Some weights of the model checkpoint at cl-tohoku/bert-base-japanese-whole-word-masking were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model : ./output/refactor/model-fold1.pth


100%|██████████| 51/51 [00:03<00:00, 15.58it/s]
Some weights of the model checkpoint at cl-tohoku/bert-base-japanese-whole-word-masking were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model : ./output/refactor/model-fold2.pth


100%|██████████| 51/51 [00:03<00:00, 15.30it/s]
Some weights of the model checkpoint at cl-tohoku/bert-base-japanese-whole-word-masking were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model : ./output/refactor/model-fold3.pth


100%|██████████| 51/51 [00:03<00:00, 15.47it/s]
Some weights of the model checkpoint at cl-tohoku/bert-base-japanese-whole-word-masking were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Getting predictions for model : ./output/refactor/model-fold4.pth


100%|██████████| 51/51 [00:03<00:00, 15.27it/s]


In [26]:
final_preds = np.mean(np.array(preds_list), axis=0)
test_df[f"{model_id}_pred"] = np.argmax(final_preds, axis=1)

submission = pd.read_csv(f"{data_path}sample_submission.csv")
submission = pd.merge(submission, test_df.loc[:, ["id", f"{model_id}_pred"]], how="left", on="id")
submission = submission.drop(["label"], axis=1).rename(columns={f"{model_id}_pred": "label"})

In [34]:
submission.to_csv(f"{settings.output_path}submission.csv", index=False)