In [1]:
import os
import sys
import glob
import shutil
import numpy as np
from tqdm import tqdm
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
import torch
import torch.nn.functional as F

In [3]:
from library import tool
from library import preprocess
from library import score

In [4]:
# GPUチェック
tool.is_cuda_available()

gpu is available


In [5]:
# 設定ファイルを読み込み
model_settings = tool.ReadModelTokenizerTome('./settings/model_tokenizer.toml')
model_settings

[pattern1]
tokenizer = "koheiduck/bert-japanese-finetuned-sentiment"
model = "koheiduck/bert-japanese-finetuned-sentiment"
score_folder = "koheiduck_bert-japanese-finetuned-sentiment"

[pattern2]
tokenizer = "A-Funakoshi/bert-multilingual-sentiments-base"
model = "A-Funakoshi/bert-multilingual-sentiments-base"
score_folder = "bert-multilingual-sentiments-base"

[pattern3]
tokenizer = "A-Funakoshi/bert-finetuned-multilingual-sentiments-adamw"
model = "A-Funakoshi/bert-finetuned-multilingual-sentiments-adamw"
score_folder = "bert-finetuned-multilingual-sentiments-adamw"

[pattern4]
tokenizer = "A-Funakoshi/bert-wrime-base"
model = "A-Funakoshi/bert-wrime-base"
score_folder = "bert-wrime-base"

[pattern5]
tokenizer = "A-Funakoshi/bert-base-japanese-v3-wrime-v2"
model = "A-Funakoshi/bert-base-japanese-v3-wrime-v2"
score_folder = "bert-base-japanese-v3-wrime-v2"

In [6]:
# モデルやトークナイザーの名称を定義
PATTERN = 'pattern1'

model_settings.read(PATTERN)
print(model_settings.get_str())

tokenizer:koheiduck/bert-japanese-finetuned-sentiment
model:koheiduck/bert-japanese-finetuned-sentiment
score_folder:koheiduck_bert-japanese-finetuned-sentiment


In [7]:
# モデルとトークナイザの読み込み
tokenizer = AutoTokenizer.from_pretrained(model_settings.tokenizer)
model = AutoModelForSequenceClassification.from_pretrained(model_settings.model)



In [8]:
# 動作確認
cls = pipeline('sentiment-analysis',model=model,tokenizer=tokenizer)
print(cls("私はとっても幸せ"))
print(cls('吾輩は猫である'))
print(cls("私はとっても不幸"))

[{'label': 'POSITIVE', 'score': 0.9896088242530823}]
[{'label': 'NEUTRAL', 'score': 0.9275946021080017}]
[{'label': 'NEGATIVE', 'score': 0.9918387532234192}]


In [38]:
# トークナイズ処理
tokenized_text = tokenizer("私はとっても幸せ" , padding=True, truncation=True, return_tensors='pt')
# スコア取得
with torch.no_grad():
    outputs = model(**tokenized_text)
    predictions = F.softmax(outputs.logits, dim=1)
    prediction = torch.argmax(predictions, dim=1)

print('logits :', outputs.logits.numpy()[0])
print('predictions :', predictions.numpy()[0])
print('prediction :', 
      prediction.numpy()[0], 
      model.config.id2label[prediction.numpy()[0]])

logits : [-1.3570833 -2.2782881  3.534339 ]
predictions : [0.00743269 0.0029585  0.9896088 ]
prediction : 2 POSITIVE
