In [1]:
import torch
import numpy as np
import evaluate
import gc

from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
)

In [2]:
# Setting seed for reproducibility and checking whether cuda is on

torch.manual_seed(16)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# We will use a smaller version of ruBert to better understand Russian semantics

tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
model = AutoModelForSequenceClassification.from_pretrained(
    "cointegrated/rubert-tiny2", num_labels=3
)

Some weights of the model checkpoint at cointegrated/rubert-tiny2 were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not 

In [4]:
# Loading dataset and fixing false 'train_test_split'

dataset = load_dataset("csv", data_files="data/text-target.csv")
dataset = dataset["train"]
dataset = dataset.rename_column(
    "target", "labels"
)  # for compatibility with transformers trainer

Using custom data configuration default-8049d2a1ed22d6e9
Found cached dataset csv (C:/Users/al_po/.cache/huggingface/datasets/csv/default-8049d2a1ed22d6e9/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)


  0%|          | 0/1 [00:00<?, ?it/s]

In [5]:
# Defining preprocessing for text to get embedding lookup table and attention


def preprocess_function(examples):
    return tokenizer(
        examples["text"], padding="max_length", truncation=True, return_tensors="pt"
    )

In [6]:
dataset = dataset.map(preprocess_function, batched=True)  # processing dataset

Loading cached processed dataset at C:/Users/al_po/.cache/huggingface/datasets/csv/default-8049d2a1ed22d6e9/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317\cache-b27e36eba33aa001.arrow


In [7]:
dataset = dataset.train_test_split()


In [8]:
# Leaving only the necessary for training columns

train_dataset = dataset["train"].shuffle(seed=16).remove_columns(["text"])
test_dataset = dataset["test"].remove_columns(["text"])

In [9]:
# We will max the f1-macro

metric = evaluate.load("f1")

In [10]:
# Function for trainer evaluation


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels, average="macro")

In [13]:
# Configuring Trainer
# Check 'rubert_hp_search' for hyperparameter search example

training_args = TrainingArguments(
    output_dir="models/rubert_fine_tune/",
    evaluation_strategy="epoch",
    learning_rate=1.7e-5,
    num_train_epochs=4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    weight_decay=0.24,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [17]:
# Cleaning cuda cache before training

torch.cuda.empty_cache()
gc.collect()

59

In [15]:
trainer.train()


***** Running training *****
  Num examples = 9750
  Num Epochs = 4
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 1
  Total optimization steps = 9752
  Number of trainable parameters = 29194707


  0%|          | 0/9752 [00:00<?, ?it/s]

Saving model checkpoint to models/rubert_fine_tune/checkpoint-500
Configuration saved in models/rubert_fine_tune/checkpoint-500\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-500\pytorch_model.bin


{'loss': 0.83, 'learning_rate': 1.6128383921246925e-05, 'epoch': 0.21}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-1000
Configuration saved in models/rubert_fine_tune/checkpoint-1000\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-1000\pytorch_model.bin


{'loss': 0.5514, 'learning_rate': 1.5256767842493846e-05, 'epoch': 0.41}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-1500
Configuration saved in models/rubert_fine_tune/checkpoint-1500\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-1500\pytorch_model.bin


{'loss': 0.5127, 'learning_rate': 1.4385151763740772e-05, 'epoch': 0.62}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-2000
Configuration saved in models/rubert_fine_tune/checkpoint-2000\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-2000\pytorch_model.bin


{'loss': 0.5551, 'learning_rate': 1.3513535684987695e-05, 'epoch': 0.82}


***** Running Evaluation *****
  Num examples = 3250
  Batch size = 4


  0%|          | 0/813 [00:00<?, ?it/s]

{'eval_loss': 0.5537195205688477, 'eval_f1': 0.8467055484151939, 'eval_runtime': 56.8316, 'eval_samples_per_second': 57.186, 'eval_steps_per_second': 14.305, 'epoch': 1.0}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-2500
Configuration saved in models/rubert_fine_tune/checkpoint-2500\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-2500\pytorch_model.bin


{'loss': 0.5791, 'learning_rate': 1.2641919606234618e-05, 'epoch': 1.03}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-3000
Configuration saved in models/rubert_fine_tune/checkpoint-3000\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-3000\pytorch_model.bin


{'loss': 0.5432, 'learning_rate': 1.1770303527481541e-05, 'epoch': 1.23}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-3500
Configuration saved in models/rubert_fine_tune/checkpoint-3500\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-3500\pytorch_model.bin


{'loss': 0.503, 'learning_rate': 1.0898687448728466e-05, 'epoch': 1.44}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-4000
Configuration saved in models/rubert_fine_tune/checkpoint-4000\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-4000\pytorch_model.bin


{'loss': 0.5095, 'learning_rate': 1.002707136997539e-05, 'epoch': 1.64}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-4500
Configuration saved in models/rubert_fine_tune/checkpoint-4500\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-4500\pytorch_model.bin


{'loss': 0.4917, 'learning_rate': 9.155455291222314e-06, 'epoch': 1.85}


***** Running Evaluation *****
  Num examples = 3250
  Batch size = 4


  0%|          | 0/813 [00:00<?, ?it/s]

{'eval_loss': 0.5713302493095398, 'eval_f1': 0.850105980490263, 'eval_runtime': 126.3777, 'eval_samples_per_second': 25.717, 'eval_steps_per_second': 6.433, 'epoch': 2.0}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-5000
Configuration saved in models/rubert_fine_tune/checkpoint-5000\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-5000\pytorch_model.bin


{'loss': 0.5091, 'learning_rate': 8.283839212469236e-06, 'epoch': 2.05}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-5500
Configuration saved in models/rubert_fine_tune/checkpoint-5500\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-5500\pytorch_model.bin


{'loss': 0.4197, 'learning_rate': 7.41222313371616e-06, 'epoch': 2.26}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-6000
Configuration saved in models/rubert_fine_tune/checkpoint-6000\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-6000\pytorch_model.bin


{'loss': 0.5004, 'learning_rate': 6.540607054963085e-06, 'epoch': 2.46}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-6500
Configuration saved in models/rubert_fine_tune/checkpoint-6500\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-6500\pytorch_model.bin


{'loss': 0.463, 'learning_rate': 5.668990976210008e-06, 'epoch': 2.67}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-7000
Configuration saved in models/rubert_fine_tune/checkpoint-7000\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-7000\pytorch_model.bin


{'loss': 0.4476, 'learning_rate': 4.797374897456932e-06, 'epoch': 2.87}


***** Running Evaluation *****
  Num examples = 3250
  Batch size = 4


  0%|          | 0/813 [00:00<?, ?it/s]

{'eval_loss': 0.6077721118927002, 'eval_f1': 0.8503094205766969, 'eval_runtime': 57.6801, 'eval_samples_per_second': 56.345, 'eval_steps_per_second': 14.095, 'epoch': 3.0}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-7500
Configuration saved in models/rubert_fine_tune/checkpoint-7500\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-7500\pytorch_model.bin


{'loss': 0.4466, 'learning_rate': 3.925758818703856e-06, 'epoch': 3.08}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-8000
Configuration saved in models/rubert_fine_tune/checkpoint-8000\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-8000\pytorch_model.bin


{'loss': 0.4334, 'learning_rate': 3.0541427399507795e-06, 'epoch': 3.28}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-8500
Configuration saved in models/rubert_fine_tune/checkpoint-8500\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-8500\pytorch_model.bin


{'loss': 0.4407, 'learning_rate': 2.1825266611977033e-06, 'epoch': 3.49}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-9000
Configuration saved in models/rubert_fine_tune/checkpoint-9000\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-9000\pytorch_model.bin


{'loss': 0.4702, 'learning_rate': 1.3109105824446267e-06, 'epoch': 3.69}


Saving model checkpoint to models/rubert_fine_tune/checkpoint-9500
Configuration saved in models/rubert_fine_tune/checkpoint-9500\config.json
Model weights saved in models/rubert_fine_tune/checkpoint-9500\pytorch_model.bin


{'loss': 0.4178, 'learning_rate': 4.3929450369155046e-07, 'epoch': 3.9}


***** Running Evaluation *****
  Num examples = 3250
  Batch size = 4


  0%|          | 0/813 [00:00<?, ?it/s]



Training completed. Do not forget to share your model on huggingface.co/models =)




{'eval_loss': 0.6083604693412781, 'eval_f1': 0.8492370450553963, 'eval_runtime': 57.6077, 'eval_samples_per_second': 56.416, 'eval_steps_per_second': 14.113, 'epoch': 4.0}
{'train_runtime': 3123.7653, 'train_samples_per_second': 12.485, 'train_steps_per_second': 3.122, 'train_loss': 0.5023419119668261, 'epoch': 4.0}


TrainOutput(global_step=9752, training_loss=0.5023419119668261, metrics={'train_runtime': 3123.7653, 'train_samples_per_second': 12.485, 'train_steps_per_second': 3.122, 'train_loss': 0.5023419119668261, 'epoch': 4.0})

In [16]:
trainer.evaluate(test_dataset)


***** Running Evaluation *****
  Num examples = 3250
  Batch size = 4


  0%|          | 0/813 [00:00<?, ?it/s]

{'eval_loss': 0.6083604693412781,
 'eval_f1': 0.8492370450553963,
 'eval_runtime': 100.2571,
 'eval_samples_per_second': 32.417,
 'eval_steps_per_second': 8.109,
 'epoch': 4.0}

In [16]:
trainer.save_model("models/rubert_fine_tune/v2")

Saving model checkpoint to models/rubert_fine_tune/v1
Configuration saved in models/rubert_fine_tune/v1\config.json
Model weights saved in models/rubert_fine_tune/v1\pytorch_model.bin
