In [12]:
from transformers import BertForSequenceClassification
import pandas as pd
model = BertForSequenceClassification.from_pretrained("beomi/kcbert-base")
df = pd.read_csv('labeled.csv')
eval_df = pd.read_csv('test2.csv')

Downloading:   0%|          | 0.00/418M [00:00<?, ?B/s]

Some weights of the model checkpoint at beomi/kcbert-base were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initiali

In [13]:
from data import tokenized_dataset, load_dataset
from tokenizers import BertWordPieceTokenizer

tokenizer = BertWordPieceTokenizer('./vocab_3.txt')

labels = list(df['label'])
eval_labels = list(eval_df['label'])

df = tokenized_dataset(tokenizer, df)
eval_df = tokenized_dataset(tokenizer, eval_df)


dataset = load_dataset(df, labels)
eval_dataset = load_dataset(eval_df, eval_labels)

In [45]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    do_train=True,
    output_dir=f'./results/',
    save_total_limit=5,
    save_steps=10,
    num_train_epochs=5,
    learning_rate=1e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_dir='./logs',
    logging_steps=10,
    # weight_decay=1e-6,
    evaluation_strategy='steps',
    eval_steps=10,
    load_best_model_at_end=True,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [46]:
model.resize_token_embeddings(30000)

Embedding(30000, 768, padding_idx=0)

In [47]:
model.cuda()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30000, 768, padding_idx=0)
      (position_embeddings): Embedding(300, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [48]:
from sklearn.metrics import f1_score, accuracy_score
def compute_metrics(preds):
    labels = preds.label_ids
    preds = preds.predictions.argmax(-1)
    return {'f1_score': f1_score(labels,preds,average='macro'), 'acc' : accuracy_score(labels,preds)}

In [49]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics
)

In [None]:
import mlflow
mlflow.end_run()
trainer.train()