# Finetuning BERT for sentiment analysis

In [1]:
import transformers

transformers.__version__

'4.24.0'

In [2]:
from torch import cuda

device = 'cuda' if cuda.is_available() else 'cpu'
device

'cuda'

```python
import wandb

wandb.init()
```

**네이버 영화 리뷰 데이터셋 다운로드**

In [4]:
from datasets import load_dataset

nsmc_train= load_dataset('nsmc', split="train[:20000]+train[-20000:]")
nsmc_test= load_dataset('nsmc', split="test[:5000]+test[-5000:]")
nsmc_val= load_dataset('nsmc', split="test[5000:10000]+test[-10000:-5000]")

Found cached dataset nsmc (/home/mark10/.cache/huggingface/datasets/nsmc/default/1.1.0/bfd4729bf1a67114e5267e6916b9e4807010aeb238e4a3c2b95fbfa3a014b5f3)
Found cached dataset nsmc (/home/mark10/.cache/huggingface/datasets/nsmc/default/1.1.0/bfd4729bf1a67114e5267e6916b9e4807010aeb238e4a3c2b95fbfa3a014b5f3)
Found cached dataset nsmc (/home/mark10/.cache/huggingface/datasets/nsmc/default/1.1.0/bfd4729bf1a67114e5267e6916b9e4807010aeb238e4a3c2b95fbfa3a014b5f3)


In [5]:
nsmc_train.shape, nsmc_test.shape, nsmc_val.shape

((40000, 3), (10000, 3), (10000, 3))

**모델 및 토크나이저 다운로드**
- 사전 학습 된 다국어 BERT 모델 사용

In [6]:
from transformers import BertForSequenceClassification, BertTokenizerFast

tokenizer = BertTokenizerFast.from_pretrained("bert-base-multilingual-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-multilingual-uncased", num_labels=2)

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

In [7]:
print(f"{model.num_parameters() /(10**6)} million parameters")

167.357954 million parameters


In [8]:
enc_train = nsmc_train.map(lambda e: tokenizer(e['document'], max_length=128, padding=True, truncation=True), batched=True, batch_size=1000) 
enc_test = nsmc_test.map(lambda e: tokenizer(e['document'], max_length=128, padding=True, truncation=True), batched=True, batch_size=1000) 
enc_val = nsmc_val.map(lambda e: tokenizer(e['document'], max_length=128, padding=True, truncation=True), batched=True, batch_size=1000) 

  0%|          | 0/40 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

  0%|          | 0/10 [00:00<?, ?ba/s]

In [9]:
import pandas as pd
pd.DataFrame(enc_train).head(10)

Unnamed: 0,id,document,label,input_ids,token_type_ids,attention_mask
0,9976970,아 더빙.. 진짜 짜증나네요 목소리,0,"[101, 1174, 25539, 23236, 29234, 13045, 119, 1...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
1,3819312,흠...포스터보고 초딩영화줄....오버연기조차 가볍지 않구나,1,"[101, 1181, 49904, 13503, 119, 119, 119, 1180,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
2,10265843,너무재밓었다그래서보는것을추천한다,0,"[101, 1165, 33645, 32261, 35748, 22699, 97109,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
3,9045019,교도소 이야기구먼 ..솔직히 재미는 없다..평점 조정,0,"[101, 1163, 44840, 12265, 22585, 12398, 93197,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
4,6483659,사이몬페그의 익살스런 연기가 돋보였던 영화!스파이더맨에서 늙어보이기만 했던 커스틴 ...,1,"[101, 1172, 47042, 38914, 11830, 97086, 40389,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
5,5403919,막 걸음마 뗀 3세부터 초등학교 1학년생인 8살용영화.ㅋㅋㅋ...별반개도 아까움.,0,"[101, 1169, 93871, 1163, 84098, 31999, 23823, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
6,7797314,원작의 긴장감을 제대로 살려내지못했다.,0,"[101, 1174, 97090, 11830, 31542, 10576, 1163, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
7,9443947,별 반개도 아깝다 욕나온다 이응경 길용우 연기생활이몇년인지..정말 발로해도 그것보단...,0,"[101, 1170, 59817, 1170, 26646, 22123, 12265, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
8,7156791,액션이 없는데도 재미 있는 몇안되는 영화,1,"[101, 1174, 48657, 39354, 11112, 39967, 26872,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
9,5912145,왜케 평점이 낮은건데? 꽤 볼만한데.. 헐리우드식 화려함에만 너무 길들여져 있나?,1,"[101, 1174, 83955, 81261, 1180, 42908, 34619, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."


In [10]:
len(enc_train['input_ids'][0]), len(enc_train['token_type_ids'][0]), len(enc_train['attention_mask'][0])

(128, 128, 128)

In [11]:
epochs = 10
batch_size = 16

In [12]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    # The output directory where the model predictions and checkpoints will be written
    output_dir='./nsmcModel', 
    do_train=True,
    do_eval=True,

    #  The number of epochs, defaults to 3.0 
    num_train_epochs=epochs,              
    per_device_train_batch_size=batch_size,  
    per_device_eval_batch_size=batch_size,
    
    # Number of steps used for a linear warmup
    warmup_steps=500,                
    weight_decay=0.01,
    logging_strategy='steps', 
    
    # TensorBoard log directory               
    logging_dir='./logs',            
    logging_steps=50,
    
    # other options : no, steps
    evaluation_strategy="epoch",
    save_strategy="epoch",
    fp16=cuda.is_available(),
    load_best_model_at_end=True,
#     report_to="wandb"
)

In [13]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    
    return {
        'Accuracy': acc,
        'F1': f1,
        'Precision': precision,
        'Recall': recall
    }

In [14]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=enc_train,         
    eval_dataset=enc_val,            
    compute_metrics= compute_metrics
)

Using cuda_amp half precision backend


In [15]:
results=trainer.train()

The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 40000
  Num Epochs = 10
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 25000
  Number of trainable parameters = 167357954
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4006,0.39944,0.8166,0.816184,0.820111,0.816881
2,0.3604,0.43943,0.8319,0.831802,0.833001,0.832054
3,0.338,0.393685,0.8395,0.839443,0.840252,0.839627
4,0.2481,0.534726,0.8261,0.825256,0.831659,0.825749
5,0.2275,0.501829,0.8369,0.836874,0.837303,0.836993
6,0.2179,0.588248,0.8401,0.840009,0.840598,0.839995
7,0.1393,0.662704,0.8423,0.842231,0.842656,0.842212
8,0.1077,0.675312,0.8444,0.844209,0.845668,0.844236
9,0.0754,0.737136,0.843,0.842979,0.843061,0.842962
10,0.0859,0.785487,0.8438,0.843768,0.84392,0.843748


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 16
Saving model checkpoint to ./nsmcModel/checkpoint-2500
Configuration saved in ./nsmcModel/checkpoint-2500/config.json
Model weights saved in ./nsmcModel/checkpoint-2500/pytorch_model.bin
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 16
Saving model checkpoint to ./nsmcModel/checkpoint-5000
Configuration saved in ./nsmcModel/checkpoint-5000/config.json
Mod

In [16]:
q=[trainer.evaluate(eval_dataset=data) for data in [enc_train, enc_val, enc_test]]

pd.DataFrame(q, index=["train","val","test"]).iloc[:,:5]

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 40000
  Batch size = 16


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 16
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 16


Unnamed: 0,eval_loss,eval_Accuracy,eval_F1,eval_Precision,eval_Recall
train,0.208184,0.9281,0.928055,0.92882,0.927991
val,0.393685,0.8395,0.839443,0.840252,0.839627
test,0.367399,0.8455,0.845367,0.846488,0.84542


## Save model

In [18]:
# saving the best fine-tuned model & tokenizer
model_save_path = "best_nsmc_model"
trainer.save_model(model_save_path)
tokenizer.save_pretrained(model_save_path)

Saving model checkpoint to best_nsmc_model
Configuration saved in best_nsmc_model/config.json
Model weights saved in best_nsmc_model/pytorch_model.bin
tokenizer config file saved in best_nsmc_model/tokenizer_config.json
Special tokens file saved in best_nsmc_model/special_tokens_map.json


('best_nsmc_model/tokenizer_config.json',
 'best_nsmc_model/special_tokens_map.json',
 'best_nsmc_model/vocab.txt',
 'best_nsmc_model/added_tokens.json',
 'best_nsmc_model/tokenizer.json')

## Prediction

In [19]:
def get_prediction(text):
    inputs = tokenizer(text, padding=True, truncation=True, max_length=128, return_tensors="pt").to(device)
    outputs = model(inputs["input_ids"].to(device),inputs["attention_mask"].to(device))
    probs = outputs[0].softmax(1)
    return probs, probs.argmax()

In [22]:
model.to(device)
text = "영화가 재밌지도 않고 재미없지도 않는 그런 영화입니다."
get_prediction(text)[1].item()

0

## Use pipeline

In [25]:
from transformers import pipeline, BertForSequenceClassification, BertTokenizerFast
model = BertForSequenceClassification.from_pretrained("best_nsmc_model")
tokenizer= BertTokenizerFast.from_pretrained("best_nsmc_model")
nlp= pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
nlp("영화가 수작이네요")

loading configuration file best_nsmc_model/config.json
Model config BertConfig {
  "_name_or_path": "bert-base-multilingual-uncased",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4.24.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 105879
}

loading weights file be

[{'label': 'LABEL_1', 'score': 0.9623047709465027}]

In [28]:
nlp("영화가 재밌지도 않고 재미없지도 않는 그런 영화입니다.")

[{'label': 'LABEL_0', 'score': 0.528073251247406}]

---

In [29]:
def model_init():
    return BertForSequenceClassification.from_pretrained("bert-base-multilingual-uncased", num_labels=2)

In [36]:
trainer = Trainer(
    model_init=model_init,
    args=training_args,
    train_dataset=enc_train,
    eval_dataset=enc_val,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

loading configuration file config.json from cache at /home/mark10/.cache/huggingface/hub/models--bert-base-multilingual-uncased/snapshots/800c34f3d5aa174fe531f560b44b8d14592225b7/config.json
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.24.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 105879
}

loading weights file pytorch_mo

In [37]:
best_run = trainer.hyperparameter_search(n_trials=10, direction="maximize")

[32m[I 2022-11-16 13:00:36,484][0m A new study created in memory with name: no-name-690f2455-6997-44f0-9326-841f03a5ceef[0m
Trial: {'learning_rate': 3.905321149925238e-06, 'num_train_epochs': 1, 'seed': 20, 'per_device_train_batch_size': 4}
loading configuration file config.json from cache at /home/mark10/.cache/huggingface/hub/models--bert-base-multilingual-uncased/snapshots/800c34f3d5aa174fe531f560b44b8d14592225b7/config.json
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "poole

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669162568481017, max=1.0…

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6124,0.556933,0.8059,0.8059,0.805914,0.805919


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 16
Saving model checkpoint to ./nsmcModel/run-0/checkpoint-10000
Configuration saved in ./nsmcModel/run-0/checkpoint-10000/config.json
Model weights saved in ./nsmcModel/run-0/checkpoint-10000/pytorch_model.bin
tokenizer config file saved in ./nsmcModel/run-0/checkpoint-10000/tokenizer_config.json
Special tokens file saved in ./nsmcModel/run-0/checkpoint-10000/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./nsmcModel/run-0/checkpoint-10000 (score: 0.5569326281547546).
[32m[I 2022-11-16 13:18:25,982][0m Trial 0 finished with value: 3.223632168007785 and paramete

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/Accuracy,▁
eval/F1,▁
eval/Precision,▁
eval/Recall,▁
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████

0,1
eval/Accuracy,0.8059
eval/F1,0.8059
eval/Precision,0.80591
eval/Recall,0.80592
eval/loss,0.55693
eval/runtime,24.2096
eval/samples_per_second,413.059
eval/steps_per_second,25.816
train/epoch,1.0
train/global_step,10000.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666953828341017, max=1.0)…

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.3583,0.389962,0.8337,0.8337,0.833734,0.833728


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 16
Saving model checkpoint to ./nsmcModel/run-1/checkpoint-2500
Configuration saved in ./nsmcModel/run-1/checkpoint-2500/config.json
Model weights saved in ./nsmcModel/run-1/checkpoint-2500/pytorch_model.bin
tokenizer config file saved in ./nsmcModel/run-1/checkpoint-2500/tokenizer_config.json
Special tokens file saved in ./nsmcModel/run-1/checkpoint-2500/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./nsmcModel/run-1/checkpoint-2500 (score: 0.38996249437332153).
[32m[I 2022-11-16 13:26:14,346][0m Trial 1 finished with value: 3.334861801874644 and parameters: {

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/Accuracy,▁
eval/F1,▁
eval/Precision,▁
eval/Recall,▁
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████

0,1
eval/Accuracy,0.8337
eval/F1,0.8337
eval/Precision,0.83373
eval/Recall,0.83373
eval/loss,0.38996
eval/runtime,24.233
eval/samples_per_second,412.66
eval/steps_per_second,25.791
train/epoch,1.0
train/global_step,2500.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016668111299319815, max=1.0…

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4544,0.432421,0.8001,0.799674,0.803252,0.800372
2,0.3933,0.394286,0.8233,0.823185,0.82385,0.823186
3,0.3781,0.383514,0.8331,0.83303,0.833426,0.833014
4,0.2931,0.390989,0.8352,0.835184,0.835229,0.835171


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 16
Saving model checkpoint to ./nsmcModel/run-2/checkpoint-1250
Configuration saved in ./nsmcModel/run-2/checkpoint-1250/config.json
Model weights saved in ./nsmcModel/run-2/checkpoint-1250/pytorch_model.bin
tokenizer config file saved in ./nsmcModel/run-2/checkpoint-1250/tokenizer_config.json
Special tokens file saved in ./nsmcModel/run-2/checkpoint-1250/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
*

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/Accuracy,▁▆██
eval/F1,▁▆██
eval/Precision,▁▆██
eval/Recall,▁▆██
eval/loss,█▃▁▂
eval/runtime,█▃▁▃
eval/samples_per_second,▁▆█▆
eval/steps_per_second,▁▆█▆
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████

0,1
eval/Accuracy,0.8352
eval/F1,0.83518
eval/Precision,0.83523
eval/Recall,0.83517
eval/loss,0.39099
eval/runtime,22.8392
eval/samples_per_second,437.844
eval/steps_per_second,27.365
train/epoch,4.0
train/global_step,5000.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016668447483486185, max=1.0…

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4917,0.479512,0.7748,0.774765,0.774851,0.774756
2,0.4156,0.41797,0.81,0.809994,0.809994,0.809994
3,0.3792,0.402267,0.8248,0.824726,0.825633,0.824935
4,0.3698,0.395241,0.8258,0.82578,0.825842,0.825766
5,0.3338,0.397338,0.8269,0.826889,0.826909,0.82688


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 16
Saving model checkpoint to ./nsmcModel/run-3/checkpoint-1250
Configuration saved in ./nsmcModel/run-3/checkpoint-1250/config.json
Model weights saved in ./nsmcModel/run-3/checkpoint-1250/pytorch_model.bin
tokenizer config file saved in ./nsmcModel/run-3/checkpoint-1250/tokenizer_config.json
Special tokens file saved in ./nsmcModel/run-3/checkpoint-1250/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
*

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/Accuracy,▁▆███
eval/F1,▁▆███
eval/Precision,▁▆███
eval/Recall,▁▆███
eval/loss,█▃▂▁▁
eval/runtime,▆▁▄█▁
eval/samples_per_second,▃█▄▁█
eval/steps_per_second,▃█▄▁█
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
eval/Accuracy,0.8269
eval/F1,0.82689
eval/Precision,0.82691
eval/Recall,0.82688
eval/loss,0.39734
eval/runtime,22.2582
eval/samples_per_second,449.273
eval/steps_per_second,28.08
train/epoch,5.0
train/global_step,6250.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666982110279302, max=1.0)…

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4388,0.42947,0.8165,0.816498,0.816578,0.816542
2,0.4151,0.448823,0.8284,0.827712,0.832948,0.828083
3,0.3124,0.463327,0.8441,0.844087,0.844356,0.844174
4,0.3471,0.492081,0.8459,0.8459,0.845929,0.845926


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 16
Saving model checkpoint to ./nsmcModel/run-4/checkpoint-5000
Configuration saved in ./nsmcModel/run-4/checkpoint-5000/config.json
Model weights saved in ./nsmcModel/run-4/checkpoint-5000/pytorch_model.bin
tokenizer config file saved in ./nsmcModel/run-4/checkpoint-5000/tokenizer_config.json
Special tokens file saved in ./nsmcModel/run-4/checkpoint-5000/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
*

0,1
eval/Accuracy,▁▄██
eval/F1,▁▄██
eval/Precision,▁▅██
eval/Recall,▁▄██
eval/loss,▁▃▅█
eval/runtime,▅▁█▄
eval/samples_per_second,▄█▁▅
eval/steps_per_second,▄█▁▅
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
eval/Accuracy,0.8459
eval/F1,0.8459
eval/Precision,0.84593
eval/Recall,0.84593
eval/loss,0.49208
eval/runtime,23.0237
eval/samples_per_second,434.336
eval/steps_per_second,27.146
train/epoch,4.0
train/global_step,20000.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669193899724634, max=1.0…

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6997,0.693446,0.5027,0.334531,0.25135,0.5


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-11-16 15:16:14,203][0m Trial 5 pruned. [0m
Trial: {'learning_rate': 1.4695389183741484e-06, 'num_train_epochs': 4, 'seed': 6, 'per_device_train_batch_size': 8}
loading configuration file config.json from cache at /home/mark10/.cache/huggingface/hub/models--bert-base-multilingual-uncased/snapshots/800c34f3d5aa174fe531f560b44b8d14592225b7/config.json
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidde

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/Accuracy,▁
eval/F1,▁
eval/Precision,▁
eval/Recall,▁
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
eval/Accuracy,0.5027
eval/F1,0.33453
eval/Precision,0.25135
eval/Recall,0.5
eval/loss,0.69345
eval/runtime,22.9298
eval/samples_per_second,436.114
eval/steps_per_second,27.257
train/epoch,1.0
train/global_step,10000.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669409264189503, max=1.0…

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4604,0.464678,0.7889,0.788769,0.789932,0.789057


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 16
[32m[I 2022-11-16 15:26:52,344][0m Trial 6 pruned. [0m
Trial: {'learning_rate': 5.546510628978957e-05, 'num_train_epochs': 5, 'seed': 13, 'per_device_train_batch_size': 4}
loading configuration file config.json from cache at /home/mark10/.cache/huggingface/hub/models--bert-base-multilingual-uncased/snapshots/800c34f3d5aa174fe531f560b44b8d14592225b7/config.json
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermedi

0,1
eval/Accuracy,▁
eval/F1,▁
eval/Precision,▁
eval/Recall,▁
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████

0,1
eval/Accuracy,0.7889
eval/F1,0.78877
eval/Precision,0.78993
eval/Recall,0.78906
eval/loss,0.46468
eval/runtime,22.2005
eval/samples_per_second,450.441
eval/steps_per_second,28.153
train/epoch,1.0
train/global_step,5000.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.0166684675651292, max=1.0))…

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7153,0.69408,0.5027,0.334531,0.25135,0.5


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
[32m[I 2022-11-16 15:43:50,546][0m Trial 7 pruned. [0m
Trial: {'learning_rate': 1.9953322694826847e-05, 'num_train_epochs': 1, 'seed': 2, 'per_device_train_batch_size': 4}
loading configuration file config.json from cache at /home/mark10/.cache/huggingface/hub/models--bert-base-multilingual-uncased/snapshots/800c34f3d5aa174fe531f560b44b8d14592225b7/config.json
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidde

0,1
eval/Accuracy,▁
eval/F1,▁
eval/Precision,▁
eval/Recall,▁
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
eval/Accuracy,0.5027
eval/F1,0.33453
eval/Precision,0.25135
eval/Recall,0.5
eval/loss,0.69408
eval/runtime,22.0384
eval/samples_per_second,453.754
eval/steps_per_second,28.36
train/epoch,1.0
train/global_step,10000.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666853219891588, max=1.0)…

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5278,0.557686,0.8273,0.827264,0.827419,0.827246


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 16
Saving model checkpoint to ./nsmcModel/run-8/checkpoint-10000
Configuration saved in ./nsmcModel/run-8/checkpoint-10000/config.json
Model weights saved in ./nsmcModel/run-8/checkpoint-10000/pytorch_model.bin
tokenizer config file saved in ./nsmcModel/run-8/checkpoint-10000/tokenizer_config.json
Special tokens file saved in ./nsmcModel/run-8/checkpoint-10000/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./nsmcModel/run-8/checkpoint-10000 (score: 0.5576856732368469).
[32m[I 2022-11-16 16:01:22,054][0m Trial 8 finished with value: 3.309228143762589 and paramete

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/Accuracy,▁
eval/F1,▁
eval/Precision,▁
eval/Recall,▁
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████

0,1
eval/Accuracy,0.8273
eval/F1,0.82726
eval/Precision,0.82742
eval/Recall,0.82725
eval/loss,0.55769
eval/runtime,22.4272
eval/samples_per_second,445.888
eval/steps_per_second,27.868
train/epoch,1.0
train/global_step,10000.0


Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666810973159348, max=1.0)…

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.3756,0.423643,0.8171,0.816401,0.821255,0.816791


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 16
[32m[I 2022-11-16 16:08:58,911][0m Trial 9 pruned. [0m


In [38]:
best_run

BestRun(run_id='4', objective=3.3836549267730573, hyperparameters={'learning_rate': 4.180462722220778e-06, 'num_train_epochs': 4, 'seed': 18, 'per_device_train_batch_size': 8})

In [39]:
for n, v in best_run.hyperparameters.items():
    setattr(trainer.args, n, v)

trainer.train()

loading configuration file config.json from cache at /home/mark10/.cache/huggingface/hub/models--bert-base-multilingual-uncased/snapshots/800c34f3d5aa174fe531f560b44b8d14592225b7/config.json
Model config BertConfig {
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "absolute",
  "transformers_version": "4.24.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 105879
}

loading weights file pytorch_mo

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4388,0.42947,0.8165,0.816498,0.816578,0.816542
2,0.4151,0.448823,0.8284,0.827712,0.832948,0.828083
3,0.3124,0.463327,0.8441,0.844087,0.844356,0.844174
4,0.3471,0.492081,0.8459,0.8459,0.845929,0.845926


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 10000
  Batch size = 16
Saving model checkpoint to ./nsmcModel/checkpoint-5000
Configuration saved in ./nsmcModel/checkpoint-5000/config.json
Model weights saved in ./nsmcModel/checkpoint-5000/pytorch_model.bin
tokenizer config file saved in ./nsmcModel/checkpoint-5000/tokenizer_config.json
Special tokens file saved in ./nsmcModel/checkpoint-5000/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: id, document. If id, document are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****


TrainOutput(global_step=20000, training_loss=0.3852480537891388, metrics={'train_runtime': 2514.4537, 'train_samples_per_second': 63.632, 'train_steps_per_second': 7.954, 'total_flos': 1.05244422144e+16, 'train_loss': 0.3852480537891388, 'epoch': 4.0})