In [2]:
cd ..

/home/is/akiyoshi-n/my-project


In [3]:
import os
# 使用するGPUを指定. この環境変数の場所は，pytorchをimportする前に入れる
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from pathlib import Path
from datetime import datetime
from src.my_project.dataset import load_dataset_4class_Multi_classification, split_test_data_stratify, load_text_dataset, split_multilabel_data
from src.my_project.train_v2 import MultiClassClassifier
from sklearn.model_selection import train_test_split
from src.my_project.dataset import load_multiclass_dataset
import wandb
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [4]:
DATASET_PATH = Path('/home/is/akiyoshi-n/my-project/data')
# 本日の日付
timestamp = datetime.now().strftime("%Y-%m-%d")
# 出力先ディレクトリ
output_dir = Path('/home/is/akiyoshi-n/my-project/outputs/{}'.format(timestamp))
# モデル出力先ディレクトリ
output_model_dir = Path('/home/is/akiyoshi-n/my-project/outputs_model')

### パラメータの設定

In [5]:
# 最大トークン数
MAX_LEN = 128
# バッチサイズ
BATCH_SIZE = 16
# エポック数
NUM_EPOCHS = 100
# 学習率
LEARNING_RATE = 2e-5
# Cross Validation時のFold数
NUM_FOLDS = 3
# 早期停止のための忍耐値
PATIENCE = 10
# 乱数シード
SEED = 2023
# クラス数
NUM_LABELS = 4
# 閾値
THRESH = 0.5

In [5]:
# データの読み込み
data, class_name = load_dataset_4class_Multi_classification(f"{DATASET_PATH}/act_classification_final.xlsx")

In [7]:
len(data['labels'][0])

4

In [8]:
data_labels_np = np.array(data['labels'])
data_labels_np.sum(axis=0)

array([ 95,  95, 336, 600])

### モデル精度評価

In [9]:
# 東北大BERT-v3
MODEL_NAME = 'cl-tohoku/bert-base-japanese-v3'
Classifier_model = MultiClassClassifier(model_name = MODEL_NAME, num_labels=NUM_LABELS, seed=SEED, thresh=THRESH)

In [10]:
# 訓練データと評価データを辞書型で抽出
train_dataset = {
    'texts': [data['texts'][i] for i in range(900)],
    'labels': [data['labels'][i] for i in range(900)]
}
eval_dataset = {
    'texts': [data['texts'][i] for i in range(900, 1100)],
    'labels': [data['labels'][i] for i in range(900, 1100)]
}

In [11]:
trainer = Classifier_model.train_model(train_dataset, eval_dataset, MAX_LEN, NUM_EPOCHS, LEARNING_RATE, BATCH_SIZE, PATIENCE, output_dir, project_name='MultiClassification', run_name='test')

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at cl-tohoku/bert-base-japanese-v3 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Parameter 'fn_kwargs'={'tokenizer': BertJapaneseTokenizer(name_or_path='cl-tohoku/bert-base-japanese-v3', vocab_size=32768, model_max_length=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False,

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.6298,0.484187,0.57,0.228642
2,0.4904,0.437475,0.575,0.450275
3,0.4243,0.376617,0.665,0.560188
4,0.3285,0.355257,0.635,0.576035
5,0.2184,0.378333,0.655,0.497473
6,0.1362,0.42458,0.69,0.54263


In [12]:
predictions = Classifier_model.predict(trainer, eval_dataset, MAX_LEN)

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [13]:
predictions

array([[0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.],


In [24]:
from transformers import AutoTokenizer
from src.my_project.dataset import preprocess_for_Trainer
import numpy as np
import torch
# tokenizerの定義
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
eval_dataset_use = preprocess_for_Trainer(eval_dataset, tokenizer, max_len=MAX_LEN)
predictions = trainer.predict(eval_dataset_use)

# predictions.predictionsにsigmoid関数を適用し，確率に変換
predictions = torch.sigmoid(torch.from_numpy(predictions.predictions))
thresh = 0.5
# predictions = torch.where(predictions > THRESH, 1, 0)
predictions_label = (predictions>thresh).float()

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [25]:
type(predictions)

torch.Tensor

In [26]:
predictions[0]

tensor([0.0440, 0.0457, 0.3518, 0.3285])

In [23]:
if torch.sum(predictions[0]) == 0:
    print(True)

True


In [31]:
predictions_label[0]

tensor([0., 0., 0., 0.])

In [32]:
a[1]

array([0., 0., 0., 1.])

In [33]:
max_index = torch.argmax(predictions[0])
max_index
a[0][max_index] = 1
a[1][:-1] = predictions_label[0][:-1]

wandb: Network error (ReadTimeout), entering retry loop.


In [17]:
a = np.zeros((5,4))
for i in range(5):
    a[i] = predictions[i]

In [19]:
type(a)

numpy.ndarray

In [1]:
predictions

NameError: name 'predictions' is not defined

In [26]:
eval_dataset_use

Dataset({
    features: ['texts', 'labels', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 200
})

In [21]:
from transformers import AutoTokenizer
from src.my_project.dataset import preprocess_for_Trainer
import numpy as np
import torch
# tokenizerの定義
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# データセットの前処理
eval_dataset_use = preprocess_for_Trainer(eval_dataset, tokenizer, max_len=MAX_LEN)
predictions = trainer.predict(eval_dataset_use)
# predictions.predictionsにsigmoid関数を適用し，確率に変換
predictions = torch.sigmoid(torch.from_numpy(predictions.predictions))
# 0.5以上の確率を1，それ以外を0に変換
thresh = 0.5
# predictions = torch.where(predictions > THRESH, 1, 0)
predictions = (predictions>thresh).float()

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [43]:
# 保存したTrainerを読み込む
model = AutoModelForSequenceClassification.from_pretrained('/home/is/akiyoshi-n/my-project/outputs/2024-02-04/cl-tohoku/bert-base-japanese-v32024-02-04T16-22-24/checkpoint-285')
trainer_v2 = Trainer(model=model)

In [19]:
import torch
for i in range(100):
    if torch.sum(predictions[i]) == 0:
        print(True)
        print(predictions[i])

True
tensor([0., 0., 0., 0.])
True
tensor([0., 0., 0., 0.])
True
tensor([0., 0., 0., 0.])
True
tensor([0., 0., 0., 0.])
True
tensor([0., 0., 0., 0.])
True
tensor([0., 0., 0., 0.])
True
tensor([0., 0., 0., 0.])
True
tensor([0., 0., 0., 0.])
True
tensor([0., 0., 0., 0.])
True
tensor([0., 0., 0., 0.])
True
tensor([0., 0., 0., 0.])


In [55]:
# trainerの予測値とeval_dataset['labels']のAccuracyとF1を出す
from sklearn.metrics import accuracy_score, f1_score
accuracy = accuracy_score(eval_dataset['labels'], predictions)
f1 = f1_score(eval_dataset['labels'], predictions, average='macro')
print(f'Accuracy: {accuracy:.4f}')
print(f'F1: {f1:.4f}')

Accuracy: 0.6850
F1: 0.6169


In [10]:
prediction = Classifier_model.predict(trainer, eval_dataset, MAX_LEN)

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [13]:
prediction.label_ids

200

In [10]:
# 評価データでの評価
Classifier_model.evaluation(trainer, eval_dataset, MAX_LEN)

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

{'eval_loss': 0.33989983797073364,
 'eval_accuracy': 0.545,
 'eval_f1': 0.5774035592587015,
 'eval_runtime': 0.8056,
 'eval_samples_per_second': 248.263,
 'eval_steps_per_second': 6.207,
 'epoch': 10.0}

### Add data

In [11]:
# add_data
add_dataset = load_text_dataset(f"{DATASET_PATH}/add_data_sub.txt.xlsx")

In [12]:
len(add_dataset['texts'])

6887

In [13]:
a = Classifier_model.get_additional_data(trainer, add_dataset, MAX_LEN)

Map:   0%|          | 0/6887 [00:00<?, ? examples/s]

In [14]:
a

tensor([[0.5022, 0.8367, 0.2520, 0.0895],
        [0.8590, 0.6207, 0.3415, 0.0363],
        [0.1242, 0.4935, 0.5855, 0.0768],
        ...,
        [0.6197, 0.7912, 0.1539, 0.0564],
        [0.5725, 0.9073, 0.2780, 0.0567],
        [0.1522, 0.9243, 0.1656, 0.1490]])

In [29]:
# 値が0.9以上かつ0.1未満しかない行を抽出
# 各要素が0.9以上または，0.1未満かどうかのブール値テンソル
b = (a >= 0.7) | (a < 0.3)
# 各行の要素がすべてTrueかどうかのブール値テンソル
c = b.all(axis=1)
# 抽出
len(a[c])

1917

In [30]:
a[c]

tensor([[0.2821, 0.2148, 0.2704, 0.1981],
        [0.0244, 0.0890, 0.0567, 0.8004],
        [0.0224, 0.0437, 0.0637, 0.7985],
        ...,
        [0.2196, 0.9242, 0.2450, 0.0725],
        [0.2991, 0.9164, 0.1451, 0.0792],
        [0.1522, 0.9243, 0.1656, 0.1490]])

In [28]:
# データを抽出
add_dataset['texts'][c]

TypeError: only integer tensors of a single element can be converted to an index

In [25]:
from collections import OrderedDict

# 文字列のリストを取得
texts = add_dataset['texts']

# OrderedDictを使用して重複を削除（順序を保持）
unique_texts = list(OrderedDict.fromkeys(texts))

# 新しい辞書を作成して結果を格納
new_add_dataset = {'texts': unique_texts}

In [26]:
a, b, c = Classifier_model.get_additional_data(trainer, new_add_dataset, MAX_LEN)

Map:   0%|          | 0/6768 [00:00<?, ? examples/s]

In [27]:
a

{'texts': ['舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  'ディズニーランドホテルなう。',
  'ディズニーランドホテルなう。',
  'ディズニーランドホテルなう。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  'ディズニーランドホテルなう。',
  'ディズニーランドホテルなう。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  'ディズニーランドホテルなう。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  'ディズニーランドホテルなう。',
  'ディズニーランドホテルなう。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  'ディズニーランドホテルなう。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  'ディズニーランドホテルなう。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  '舞浜地ビールなう。奈緒ちゃんの粋な計らい。',
  'ディズニーランドホテルなう。',
  '舞浜地ビールなう。奈緒ち

In [None]:
wandb.finish()

### Cross Validation

In [5]:
# データの読み込み
data, class_name = load_dataset_4class_Multi_classification(f"{DATASET_PATH}/act_classification_final.xlsx")

In [6]:
data_labels_np = np.array(data['labels'])
data_labels_np.sum(axis=0)

array([ 95,  95, 336, 600])

In [7]:
# testデータと訓練に使用するデータに分割
dataset, test_data = split_multilabel_data(data=data, test_size=0.1, SEED=SEED)

In [8]:
print(np.array(dataset['labels']).sum(axis=0))
print(np.array(test_data['labels']).sum(axis=0))

[ 85  85 302 540]
[10 10 34 60]


In [9]:
# 東北大BERT-v3
MODEL_NAME = 'cl-tohoku/bert-base-japanese-v3'
Classifier_model = MultiClassClassifier(model_name=MODEL_NAME, num_labels=NUM_LABELS, seed=SEED, thresh=THRESH)

In [10]:
result = Classifier_model.cross_validation(dataset, test_data, MAX_LEN, NUM_EPOCHS, LEARNING_RATE, BATCH_SIZE, PATIENCE, NUM_FOLDS, output_dir, project_name='normal_data_4class_weight')

-----------------Fold: 1-----------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at cl-tohoku/bert-base-japanese-v3 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Parameter 'fn_kwargs'={'tokenizer': BertJapaneseTokenizer(name_or_path='cl-tohoku/bert-base-japanese-v3', vocab_size=32768, model_max_length=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False,

Map:   0%|          | 0/657 [00:00<?, ? examples/s]

Map:   0%|          | 0/331 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,Macro F1,Class F1,Class Recall,Class Precision
1,0.629,0.519817,0.574,0.182,"[0.0, 0.0, 0.0, 0.729]","[0.0, 0.0, 0.0, 0.972]","[0.0, 0.0, 0.0, 0.583]"
2,0.5025,0.472133,0.625,0.192,"[0.0, 0.0, 0.0, 0.769]","[0.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.625]"
3,0.44,0.420788,0.694,0.518,"[0.75, 0.4, 0.087, 0.837]","[0.8, 0.6, 0.045, 0.928]","[0.706, 0.3, 1.0, 0.762]"
4,0.3428,0.406188,0.647,0.591,"[0.605, 0.529, 0.441, 0.789]","[0.722, 0.692, 0.349, 0.824]","[0.52, 0.429, 0.6, 0.757]"
5,0.2505,0.409904,0.648,0.582,"[0.619, 0.542, 0.366, 0.803]","[0.65, 0.727, 0.246, 0.9]","[0.591, 0.432, 0.708, 0.724]"
6,0.1718,0.44025,0.623,0.598,"[0.578, 0.542, 0.555, 0.718]","[0.542, 0.565, 0.589, 0.7]","[0.619, 0.52, 0.525, 0.737]"


Map:   0%|          | 0/112 [00:00<?, ? examples/s]

{'eval_loss': 0.4031481444835663, 'eval_accuracy': 0.571, 'eval_macro_f1': 0.575, 'eval_class_f1': [0.632, 0.471, 0.5, 0.698], 'eval_class_recall': [1.0, 0.571, 0.435, 0.682], 'eval_class_precision': [0.462, 0.4, 0.588, 0.714], 'eval_runtime': 0.3532, 'eval_samples_per_second': 317.092, 'eval_steps_per_second': 19.818, 'epoch': 6.0}
-----------------Fold: 2-----------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at cl-tohoku/bert-base-japanese-v3 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/657 [00:00<?, ? examples/s]

Map:   0%|          | 0/331 [00:00<?, ? examples/s]



VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112144403159618, max=1.0…

Epoch,Training Loss,Validation Loss,Accuracy,Macro F1,Class F1,Class Recall,Class Precision
1,0.6303,0.507666,0.652,0.197,"[0.0, 0.0, 0.0, 0.789]","[0.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.652]"
2,0.4954,0.475021,0.5,0.167,"[0.0, 0.0, 0.0, 0.667]","[0.0, 0.0, 0.0, 1.0]","[0.0, 0.0, 0.0, 0.5]"
3,0.4558,0.454587,0.762,0.266,"[0.2, 0.0, 0.0, 0.863]","[0.111, 0.0, 0.0, 1.0]","[1.0, 0.0, 0.0, 0.759]"
4,0.3967,0.432994,0.731,0.458,"[0.522, 0.267, 0.188, 0.855]","[0.4, 0.222, 0.111, 0.982]","[0.75, 0.333, 0.6, 0.757]"
5,0.3292,0.409943,0.681,0.577,"[0.5, 0.538, 0.455, 0.816]","[0.474, 0.583, 0.341, 0.912]","[0.529, 0.5, 0.682, 0.738]"
6,0.2467,0.424367,0.685,0.575,"[0.552, 0.375, 0.6, 0.775]","[0.421, 0.273, 0.616, 0.81]","[0.8, 0.6, 0.584, 0.744]"
7,0.174,0.437759,0.612,0.568,"[0.5, 0.474, 0.604, 0.695]","[0.455, 0.429, 0.79, 0.608]","[0.556, 0.529, 0.489, 0.811]"


Map:   0%|          | 0/112 [00:00<?, ? examples/s]

{'eval_loss': 0.4098375737667084, 'eval_accuracy': 0.595, 'eval_macro_f1': 0.575, 'eval_class_f1': [0.727, 0.308, 0.526, 0.739], 'eval_class_recall': [0.667, 0.4, 0.417, 0.81], 'eval_class_precision': [0.8, 0.25, 0.714, 0.68], 'eval_runtime': 0.3599, 'eval_samples_per_second': 311.224, 'eval_steps_per_second': 19.452, 'epoch': 7.0}
-----------------Fold: 3-----------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at cl-tohoku/bert-base-japanese-v3 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/662 [00:00<?, ? examples/s]

Map:   0%|          | 0/326 [00:00<?, ? examples/s]



VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112466061280833, max=1.0…

Epoch,Training Loss,Validation Loss,Accuracy,Macro F1,Class F1,Class Recall,Class Precision
1,0.6359,0.53677,0.565,0.247,"[0.0, 0.0, 0.272, 0.718]","[0.0, 0.0, 0.194, 0.907]","[0.0, 0.0, 0.452, 0.594]"
2,0.509,0.462834,0.634,0.271,"[0.0, 0.0, 0.316, 0.767]","[0.0, 0.0, 0.2, 1.0]","[0.0, 0.0, 0.75, 0.622]"
3,0.4465,0.436399,0.744,0.35,"[0.0, 0.0, 0.545, 0.853]","[0.0, 0.0, 0.395, 0.988]","[0.0, 0.0, 0.882, 0.75]"
4,0.3755,0.41562,0.633,0.489,"[0.385, 0.25, 0.563, 0.759]","[0.263, 0.143, 0.566, 0.818]","[0.714, 1.0, 0.56, 0.709]"
5,0.2852,0.382857,0.678,0.636,"[0.533, 0.714, 0.496, 0.8]","[0.5, 0.652, 0.395, 0.895]","[0.571, 0.789, 0.667, 0.723]"
6,0.206,0.37532,0.675,0.659,"[0.558, 0.714, 0.596, 0.768]","[0.5, 0.652, 0.585, 0.779]","[0.632, 0.789, 0.608, 0.758]"
7,0.1255,0.436041,0.659,0.587,"[0.5, 0.556, 0.517, 0.776]","[0.44, 0.417, 0.433, 0.873]","[0.579, 0.833, 0.639, 0.699]"
8,0.0755,0.445158,0.643,0.615,"[0.667, 0.526, 0.509, 0.758]","[0.704, 0.4, 0.452, 0.818]","[0.633, 0.769, 0.583, 0.707]"


Map:   0%|          | 0/112 [00:00<?, ? examples/s]

{'eval_loss': 0.46556010842323303, 'eval_accuracy': 0.522, 'eval_macro_f1': 0.497, 'eval_class_f1': [0.625, 0.25, 0.533, 0.579], 'eval_class_recall': [0.5, 0.2, 0.69, 0.512], 'eval_class_precision': [0.833, 0.333, 0.435, 0.667], 'eval_runtime': 0.3512, 'eval_samples_per_second': 318.945, 'eval_steps_per_second': 19.934, 'epoch': 8.0}


In [11]:
# 重みなしの場合の結果（cv=5）
average_accuracy = round(sum(d['eval_accuracy'] for d in result)/len(result), 3)
average_macro_f1 = round(sum(d['eval_macro_f1'] for d in result)/len(result), 3)
# クラスごとの平均値を計算
average_class_f1 = [round(sum(d['eval_class_f1'][i] for d in result) / len(result), 3) for i in range(len(result[0]['eval_class_f1']))]
average_class_recall = [round(sum(d['eval_class_recall'][i] for d in result) / len(result), 3) for i in range(len(result[0]['eval_class_recall']))]
average_class_precision = [round(sum(d['eval_class_precision'][i] for d in result) / len(result), 3) for i in range(len(result[0]['eval_class_precision']))]
print("Average accuracy:", average_accuracy)
print("Average Macro f1:", average_macro_f1)
print("Average Class f1:", average_class_f1)
print("Average Class recall:", average_class_recall)
print("Average Class precision:", average_class_precision)

Average accuracy: 0.622
Average Macro f1: 0.413
Average Class f1: [0.259, 0.111, 0.549, 0.732]
Average Class recall: [0.162, 0.074, 0.664, 0.747]
Average Class precision: [0.667, 0.222, 0.488, 0.74]


In [13]:
# # 重みありの場合の結果（cv=5）
# average_accuracy = round(sum(d['eval_accuracy'] for d in result)/len(result), 3)
# average_macro_f1 = round(sum(d['eval_macro_f1'] for d in result)/len(result), 3)
# # クラスごとの平均値を計算
# average_class_f1 = [round(sum(d['eval_class_f1'][i] for d in result) / len(result), 3) for i in range(len(result[0]['eval_class_f1']))]
# average_class_recall = [round(sum(d['eval_class_recall'][i] for d in result) / len(result), 3) for i in range(len(result[0]['eval_class_recall']))]
# average_class_precision = [round(sum(d['eval_class_precision'][i] for d in result) / len(result), 3) for i in range(len(result[0]['eval_class_precision']))]
# print("Average accuracy:", average_accuracy)
# print("Average Macro f1:", average_macro_f1)
# print("Average Class f1:", average_class_f1)
# print("Average Class recall:", average_class_recall)
# print("Average Class precision:", average_class_precision)

Average accuracy: 0.536
Average Macro f1: 0.518
Average Class f1: [0.659, 0.264, 0.547, 0.601]
Average Class recall: [0.627, 0.333, 0.572, 0.574]
Average Class precision: [0.73, 0.25, 0.57, 0.654]


### majority classの精度

In [12]:
print(np.array(dataset['labels']).sum(axis=0))

[ 85  85 302 540]


In [26]:
# テストサイズ×マルチラベル数の二次元リスト
majority_pred = [[0 for i in range(4)] for j in range(len(test_data['labels']))]
for i in range(len(test_data['labels'])):
    majority_pred[i][-1] = 1

In [28]:
# accuracyを計算
accuracy = round(accuracy_score(y_true=test_data['labels'], y_pred=majority_pred), 3)
# macro f1を計算
macro_f1 = round(f1_score(y_true=test_data['labels'], y_pred=majority_pred, average='macro', zero_division=0), 3)
# クラス毎のF1値を計算
class_f1 = [round(score, 3) for score in f1_score(y_true=test_data['labels'], y_pred=majority_pred, average=None, zero_division=0)]
# クラス毎のrecallを計算
class_recall = [round(score, 3) for score in recall_score(y_true=test_data['labels'], y_pred=majority_pred, average=None, zero_division=0)]
# クラス毎のprecisionを計算
class_precision = [round(score, 3) for score in precision_score(y_true=test_data['labels'], y_pred=majority_pred, average=None, zero_division=0)]
print("Average accuracy:", accuracy)
print("Average Macro f1:", macro_f1)
print("Average Class f1:", class_f1)
print("Average Class recall:", class_recall)
print("Average Class precision:", class_precision)

Average accuracy: 0.536
Average Macro f1: 0.174
Average Class f1: [0.0, 0.0, 0.0, 0.698]
Average Class recall: [0.0, 0.0, 0.0, 1.0]
Average Class precision: [0.0, 0.0, 0.0, 0.536]


### 21クラスマルチクラス分類

In [6]:
# データの読み込み
data, class_name = load_multiclass_dataset(f"{DATASET_PATH}/act_classification_final.xlsx")

In [7]:
data_labels_np = np.array(data['labels'])
data_labels_np.sum(axis=0)

array([ 21,  12,  18,  46,   3,  56,  22,   1,  12,   4, 138,  28,  18,
        81,  11,  54,   5,   1,   6, 600])

In [8]:
# testデータと訓練に使用するデータに分割
dataset, test_data = split_multilabel_data(data=data, test_size=0.1, SEED=SEED)

In [9]:
print(np.array(dataset['labels']).sum(axis=0))
print(np.array(test_data['labels']).sum(axis=0))

[ 19  11  16  41   3  50  20   1  11   4 124  25  16  73  10  49   5   1
   5 540]
[ 2  1  2  5  0  6  2  0  1  0 14  3  2  8  1  5  0  0  1 60]


In [12]:
# 東北大BERT-v3
MODEL_NAME = 'cl-tohoku/bert-base-japanese-v3'
Classifier_model = MultiClassClassifier(model_name=MODEL_NAME, num_labels=20, seed=SEED, thresh=THRESH)

In [14]:
PATIENCE=10
result = Classifier_model.cross_validation(dataset, test_data, MAX_LEN, NUM_EPOCHS, LEARNING_RATE, BATCH_SIZE, PATIENCE, NUM_FOLDS, output_dir, project_name='ChatGPT_data_4class_weight')

-----------------Fold: 1-----------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at cl-tohoku/bert-base-japanese-v3 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/655 [00:00<?, ? examples/s]

Map:   0%|          | 0/332 [00:00<?, ? examples/s]



VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112425579792924, max=1.0…

Epoch,Training Loss,Validation Loss,Accuracy,Macro F1,Class F1,Class Recall,Class Precision
1,0.5902,0.45411,0.009036,0.008997,"[0.0, 0.038, 0.0, 0.037, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.073, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.032]","[0.0, 0.333, 0.0, 0.071, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.049, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.017]","[0.0, 0.02, 0.0, 0.025, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.143, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6]"
2,0.3145,0.195492,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
3,0.1583,0.123678,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
4,0.1125,0.100055,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
5,0.0969,0.090837,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
6,0.0904,0.086057,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
7,0.0845,0.083101,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
8,0.0816,0.081199,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
9,0.0804,0.079516,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
10,0.0769,0.078642,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"


Map:   0%|          | 0/113 [00:00<?, ? examples/s]

{'accuracy': 0.7058823529411765, 'macro_f1': 0.21463203463203465, 'class_f1': [0.0, 0.0, 0.0, 0.8, 0.0, 0.667, 0.0, 0.0, 0.0, 0.0, 0.667, 0.0, 0.0, 0.571, 0.0, 0.8, 0.0, 0.0, 0.0, 0.788], 'class_recall': [0.0, 0.0, 0.0, 1.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.667, 0.0, 0.0, 0.667, 0.0, 1.0, 0.0, 0.0, 0.0, 0.812], 'class_precision': [0.0, 0.0, 0.0, 0.667, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.667, 0.0, 0.0, 0.5, 0.0, 0.667, 0.0, 0.0, 0.0, 0.765]}
-----------------Fold: 2-----------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at cl-tohoku/bert-base-japanese-v3 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/661 [00:00<?, ? examples/s]

Map:   0%|          | 0/326 [00:00<?, ? examples/s]



VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112430464062425, max=1.0…

Epoch,Training Loss,Validation Loss,Accuracy,Macro F1,Class F1,Class Recall,Class Precision
1,0.611,0.474928,0.0,0.015868,"[0.046, 0.0, 0.0, 0.143, 0.0, 0.0, 0.0, 0.0, 0.036, 0.031, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.061]","[0.333, 0.0, 0.0, 0.077, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.033]","[0.025, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.019, 0.016, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.353]"
2,0.3309,0.205422,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
3,0.1635,0.122445,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
4,0.1137,0.097827,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
5,0.0946,0.087952,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
6,0.0872,0.082601,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
7,0.0822,0.079677,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
8,0.0808,0.077865,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
9,0.0787,0.076073,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
10,0.077,0.075373,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"


Map:   0%|          | 0/113 [00:00<?, ? examples/s]

{'accuracy': 0.7272727272727273, 'macro_f1': 0.16244444444444445, 'class_f1': [0.0, 0.0, 0.0, 0.889, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.333, 0.0, 0.0, 0.8, 0.0, 0.4, 0.0, 0.0, 0.0, 0.827], 'class_recall': [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.667, 0.0, 0.5, 0.0, 0.0, 0.0, 0.886], 'class_precision': [0.0, 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.333, 0.0, 0.0, 0.0, 0.775]}
-----------------Fold: 3-----------------


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at cl-tohoku/bert-base-japanese-v3 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/658 [00:00<?, ? examples/s]

Map:   0%|          | 0/329 [00:00<?, ? examples/s]



VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112303452359305, max=1.0…

Epoch,Training Loss,Validation Loss,Accuracy,Macro F1,Class F1,Class Recall,Class Precision
1,0.6129,0.486523,0.072948,0.031935,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.087, 0.0, 0.0, 0.108, 0.0, 0.0, 0.0, 0.0, 0.0, 0.444]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.049, 0.0, 0.0, 0.083, 0.0, 0.0, 0.0, 0.0, 0.0, 0.339]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.154, 0.0, 0.0, 0.0, 0.0, 0.0, 0.642]"
2,0.3428,0.209833,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
3,0.1677,0.126464,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
4,0.1158,0.100219,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
5,0.0968,0.089691,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
6,0.0876,0.084397,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
7,0.0843,0.081338,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
8,0.0817,0.079521,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
9,0.0801,0.078272,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
10,0.0798,0.077296,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"


Map:   0%|          | 0/113 [00:00<?, ? examples/s]

{'accuracy': 0.782608695652174, 'macro_f1': 0.20681159420289857, 'class_f1': [0.0, 0.0, 0.0, 0.8, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.667, 0.0, 0.0, 0.0, 0.87], 'class_recall': [0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.833], 'class_precision': [0.0, 0.0, 0.0, 0.667, 0.0, 0.667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.909]}


In [15]:
# 重みありの場合の結果（cv=5）解答
average_accuracy = round(sum(d['accuracy'] for d in result)/len(result), 3)
average_macro_f1 = round(sum(d['macro_f1'] for d in result)/len(result), 3)
# クラスごとの平均値を計算
average_class_f1 = [round(sum(d['class_f1'][i] for d in result) / len(result), 3) for i in range(len(result[0]['class_f1']))]
average_class_recall = [round(sum(d['class_recall'][i] for d in result) / len(result), 3) for i in range(len(result[0]['class_recall']))]
average_class_precision = [round(sum(d['class_precision'][i] for d in result) / len(result), 3) for i in range(len(result[0]['class_precision']))]
print("Average accuracy:", average_accuracy)
print("Average Macro f1:", average_macro_f1)
print("Average Class f1:", average_class_f1)
print("Average Class recall:", average_class_recall)
print("Average Class precision:", average_class_precision)

Average accuracy: 0.739
Average Macro f1: 0.195
Average Class f1: [0.0, 0.0, 0.0, 0.83, 0.0, 0.489, 0.0, 0.0, 0.0, 0.0, 0.333, 0.0, 0.0, 0.79, 0.0, 0.622, 0.0, 0.0, 0.0, 0.828]
Average Class recall: [0.0, 0.0, 0.0, 1.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.289, 0.0, 0.0, 0.778, 0.0, 0.833, 0.0, 0.0, 0.0, 0.844]
Average Class precision: [0.0, 0.0, 0.0, 0.711, 0.0, 0.556, 0.0, 0.0, 0.0, 0.0, 0.556, 0.0, 0.0, 0.833, 0.0, 0.5, 0.0, 0.0, 0.0, 0.816]


In [12]:
# 重みありの場合の結果（cv=5）
average_accuracy = round(sum(d['accuracy'] for d in result)/len(result), 3)
average_macro_f1 = round(sum(d['macro_f1'] for d in result)/len(result), 3)
# クラスごとの平均値を計算
average_class_f1 = [round(sum(d['class_f1'][i] for d in result) / len(result), 3) for i in range(len(result[0]['class_f1']))]
average_class_recall = [round(sum(d['class_recall'][i] for d in result) / len(result), 3) for i in range(len(result[0]['class_recall']))]
average_class_precision = [round(sum(d['class_precision'][i] for d in result) / len(result), 3) for i in range(len(result[0]['class_precision']))]
print("Average accuracy:", average_accuracy)
print("Average Macro f1:", average_macro_f1)
print("Average Class f1:", average_class_f1)
print("Average Class recall:", average_class_recall)
print("Average Class precision:", average_class_precision)

Average accuracy: 0.675
Average Macro f1: 0.212
Average Class f1: [0.0, 0.0, 0.0, 0.867, 0.0, 0.644, 0.222, 0.0, 0.0, 0.0, 0.301, 0.0, 0.333, 0.656, 0.0, 0.413, 0.0, 0.0, 0.0, 0.798]
Average Class recall: [0.0, 0.0, 0.0, 1.0, 0.0, 0.667, 0.167, 0.0, 0.0, 0.0, 0.389, 0.0, 0.333, 0.611, 0.0, 0.333, 0.0, 0.0, 0.0, 0.838]
Average Class precision: [0.0, 0.0, 0.0, 0.778, 0.0, 0.667, 0.333, 0.0, 0.0, 0.0, 0.25, 0.0, 0.333, 0.889, 0.0, 0.556, 0.0, 0.0, 0.0, 0.777]


In [46]:
# # 重みなしの場合の結果（cv=5）
# average_accuracy = round(sum(d['eval_accuracy'] for d in result)/len(result), 3)
# average_macro_f1 = round(sum(d['eval_macro_f1'] for d in result)/len(result), 3)
# # クラスごとの平均値を計算
# average_class_f1 = [round(sum(d['eval_class_f1'][i] for d in result) / len(result), 3) for i in range(len(result[0]['eval_class_f1']))]
# average_class_recall = [round(sum(d['eval_class_recall'][i] for d in result) / len(result), 3) for i in range(len(result[0]['eval_class_recall']))]
# average_class_precision = [round(sum(d['eval_class_precision'][i] for d in result) / len(result), 3) for i in range(len(result[0]['eval_class_precision']))]
# print("Average accuracy:", average_accuracy)
# print("Average Macro f1:", average_macro_f1)
# print("Average Class f1:", average_class_f1)
# print("Average Class recall:", average_class_recall)
# print("Average Class precision:", average_class_precision)

Average accuracy: 0.724
Average Macro f1: 0.15
Average Class f1: [0.0, 0.0, 0.0, 0.333, 0.0, 0.489, 0.0, 0.0, 0.0, 0.0, 0.424, 0.0, 0.0, 0.624, 0.0, 0.3, 0.0, 0.0, 0.0, 0.831]
Average Class recall: [0.0, 0.0, 0.0, 0.333, 0.0, 0.389, 0.0, 0.0, 0.0, 0.0, 0.39, 0.0, 0.0, 0.556, 0.0, 0.278, 0.0, 0.0, 0.0, 0.916]
Average Class precision: [0.0, 0.0, 0.0, 0.333, 0.0, 0.667, 0.0, 0.0, 0.0, 0.0, 0.532, 0.0, 0.0, 0.833, 0.0, 0.333, 0.0, 0.0, 0.0, 0.767]


### majority classの精度

In [10]:
print(np.array(dataset['labels']).sum(axis=0))

[ 19  11  16  41   3  50  20   1  11   4 124  25  16  73  10  49   5   1
   5 540]


In [28]:
# テストサイズ×マルチラベル数の二次元リスト
majority_pred = [[0 for i in range(20)] for j in range(len(test_data['labels']))]
for i in range(len(test_data['labels'])):
    majority_pred[i][-1] = 1

In [30]:
# accuracyを計算
accuracy = round(accuracy_score(y_true=test_data['labels'], y_pred=majority_pred), 3)
# macro f1を計算
macro_f1 = round(f1_score(y_true=test_data['labels'], y_pred=majority_pred, average='macro', zero_division=0), 3)
# クラス毎のF1値を計算
class_f1 = [round(score, 3) for score in f1_score(y_true=test_data['labels'], y_pred=majority_pred, average=None, zero_division=0)]
# クラス毎のrecallを計算
class_recall = [round(score, 3) for score in recall_score(y_true=test_data['labels'], y_pred=majority_pred, average=None, zero_division=0)]
# クラス毎のprecisionを計算
class_precision = [round(score, 3) for score in precision_score(y_true=test_data['labels'], y_pred=majority_pred, average=None, zero_division=0)]
print("Average accuracy:", accuracy)
print("Average Macro f1:", macro_f1)
print("Average Class f1:", class_f1)
print("Average Class recall:", class_recall)
print("Average Class precision:", class_precision)

Average accuracy: 0.531
Average Macro f1: 0.035
Average Class f1: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.694]
Average Class recall: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
Average Class precision: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.531]


In [15]:
import numpy as np

In [17]:
data_labels_np = np.array(data['labels'])
data_labels_np.sum(axis=0)

array([ 21,  12,  18,  46,   3,  56,  22,   1,  12,   4, 138,  28,  18,
        81,  11,  54,   5,   1,   6, 600])

In [18]:
class_name

array(['1 - A. 睡眠', '1 - B. 身の回りの用事', '1 - C - a. 食事-飲酒あり',
       '1 - C - b. 食事-飲酒なし・不明', '2 - A - a. 通勤・通学', '2 - A - b. その他の移動',
       '2 - B. 仕事', '2 - C. 学業', '2 - D. 家事', '2 - F. 育児',
       '3 - A. メディア（テレビ・ラジオ・新聞・雑誌・ネット）', '3 - B. 買い物',
       '3 - C - a. 趣味・娯楽-体動かす', '3 - C - b. 趣味・娯楽-体動かさない',
       '3 - D - a. 交際-物理的接触', '3 - D - b. 交際-オンライン的接触', '3 - E. 受診・療養',
       '3 - F - a. 喫煙', '3 - F - b. その他の活動(事故など)', '4. 活動なし'],
      dtype=object)