### Import

In [1]:
!pip install -q -U watermark

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
markdown 3.3.6 requires importlib-metadata>=4.4; python_version < "3.10", but you have importlib-metadata 2.1.3 which is incompatible.[0m


In [2]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.17.0-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 5.2 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 44.8 MB/s 
Collecting tokenizers!=0.11.3,>=0.11.1
  Downloading tokenizers-0.11.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.5 MB)
[K     |████████████████████████████████| 6.5 MB 43.6 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 32.9 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 2.7 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Fo

In [62]:
import transformers
from transformers import BertModel, AutoTokenizer, BertTokenizer, PreTrainedTokenizerFast, AdamW, get_linear_schedule_with_warmup
import torch.nn.functional as F

import torch
import numpy as np
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict
from textwrap import wrap
from torch import nn, optim
from torch.nn.utils import clip_grad_norm_
from torch.utils.data import Dataset, DataLoader

from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments
from transformers import BertForSequenceClassification, BertConfig

### Данные

In [4]:
!gdown --id 1S6qMioqPJjyBLpLVz4gmRTnJHnjitnuV
!gdown --id 1zdmewp7ayS4js4VtrJEHzAheSW-5NBZv

Downloading...
From: https://drive.google.com/uc?id=1S6qMioqPJjyBLpLVz4gmRTnJHnjitnuV
To: /content/apps.csv
100% 134k/134k [00:00<00:00, 52.9MB/s]
Downloading...
From: https://drive.google.com/uc?id=1zdmewp7ayS4js4VtrJEHzAheSW-5NBZv
To: /content/reviews.csv
100% 7.17M/7.17M [00:00<00:00, 144MB/s]


In [7]:
df = pd.read_csv("reviews.csv")
def to_sentiment(rating):
  rating = int(rating)
  if rating <= 2:
    return 0
  elif rating == 3:
    return 1
  else: 
    return 2

df['sentiment'] = df.score.apply(to_sentiment)

In [8]:
class_names = ['negative', 'neutral', 'positive']

### Предобработка данных

In [9]:
PRE_TRAINED_MODEL_NAME = 'cointegrated/rubert-tiny-sentiment-balanced'

In [10]:
tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)

Downloading:   0%|          | 0.00/235k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/377 [00:00<?, ?B/s]

In [11]:
MAX_LEN = 160

In [12]:
class GPReviewDataset(Dataset):

  def __init__(self, reviews, targets, tokenizer, max_len):
    self.reviews = reviews
    self.targets = targets
    self.tokenizer = tokenizer
    self.max_len = max_len
  
  def __len__(self):
    return len(self.reviews)
  
  def __getitem__(self, item):
    review = str(self.reviews[item])
    target = self.targets[item]

    encoding = self.tokenizer.encode_plus(
      review,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      padding='max_length',
      return_attention_mask=True,
      return_tensors='pt',
      truncation=True
    )

    return {
      'review_text': review,
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten(),
      'labels': torch.tensor(target, dtype=torch.long) #targets
    }

In [13]:
RANDOM_SEED = 1
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [14]:
df_train, df_test = train_test_split(df, test_size=0.1, random_state=RANDOM_SEED)
df_val, df_test = train_test_split(df_test, test_size=0.5, random_state=RANDOM_SEED)

In [16]:
train_dataset = GPReviewDataset(df_train.content.to_numpy(), df_train.sentiment.to_numpy(), tokenizer, MAX_LEN)
test_dataset = GPReviewDataset(df_test.content.to_numpy(), df_test.sentiment.to_numpy(), tokenizer, MAX_LEN)
test_dataset = GPReviewDataset(df_val.content.to_numpy(), df_val.sentiment.to_numpy(), tokenizer, MAX_LEN)

### Модель SentimentClassifier

In [42]:
class SentimentClassifier(nn.Module):

  def __init__(self, n_classes):
    super().__init__()
    self.n_classes = n_classes
    self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
    self.drop = nn.Dropout(p=0.3)
    self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
  
  def forward(self, input_ids, attention_mask, labels):
    _, pooled_output = self.bert(
      input_ids=input_ids,
      attention_mask=attention_mask,
      return_dict=False)
    logits = self.out(self.drop(pooled_output))
    output = (logits,)

    loss_fn = nn.CrossEntropyLoss()
    loss = loss_fn(logits.view(-1, self.n_classes), labels.view(-1))
    return ((loss,) + output)

In [43]:
model = SentimentClassifier(len(class_names))
model = model.to(device)

loading configuration file https://huggingface.co/cointegrated/rubert-tiny-sentiment-balanced/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/57f93109c036fe11e9ad81f652ae69595fd3bbd75be351143a3763f6048a9922.da90d84d661736cace212ae8531ab550f1c9f0938189c87cea6989b1ce8531d4
Model config BertConfig {
  "_name_or_path": "rubert-tiny-sentiment-balanced",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "emb_size": 312,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 312,
  "id2label": {
    "0": "negative",
    "1": "neutral",
    "2": "positive"
  },
  "initializer_range": 0.02,
  "intermediate_size": 600,
  "label2id": {
    "negative": 0,
    "neutral": 1,
    "positive": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 3,
  "p

### Метрики

In [50]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

### Обучение 1

In [51]:
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=2,              # total number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=16,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=100,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [52]:
trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=val_dataset,            # evaluation dataset
    compute_metrics = compute_metrics    # metrics to evaluate
)

trainer.train()

***** Running training *****
  Num examples = 14171
  Num Epochs = 2
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 3544


Step,Training Loss
100,0.6544
200,0.6371
300,0.5517
400,0.5324
500,0.5207
600,0.5182
700,0.4922
800,0.5284
900,0.4541
1000,0.4905


Saving model checkpoint to ./results/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-3500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=3544, training_loss=0.4590912328078569, metrics={'train_runtime': 271.136, 'train_samples_per_second': 104.531, 'train_steps_per_second': 13.071, 'total_flos': 0.0, 'train_loss': 0.4590912328078569, 'epoch': 2.0})

### Метрики на тесте 1

In [53]:
trainer.evaluate(eval_dataset=test_dataset, metric_key_prefix="test")

***** Running Evaluation *****
  Num examples = 787
  Batch size = 16


{'epoch': 2.0,
 'test_accuracy': 0.7827191867852605,
 'test_f1': 0.7850911562125442,
 'test_loss': 0.6838757395744324,
 'test_precision': 0.7896895214982044,
 'test_recall': 0.7827191867852605,
 'test_runtime': 3.3156,
 'test_samples_per_second': 237.361,
 'test_steps_per_second': 15.08}

### Модель SentimentClassifier с cls

In [58]:
class SentimentClassifierCLS(nn.Module):

  def __init__(self, n_classes):
    super().__init__()
    self.n_classes = n_classes
    self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
    self.drop = nn.Dropout(p=0.3)
    self.out = nn.Linear(self.bert.config.hidden_size*2, n_classes)
  
  def forward(self, input_ids, attention_mask, labels):
    _, pooled_output = self.bert(
      input_ids=input_ids,
      attention_mask=attention_mask,
      return_dict=False)
    cls = _[:,0,:]
    dropped = self.drop(torch.hstack([cls, pooled_output]))
    logits = self.out(dropped)
    output = (logits,)
    
    loss_fn = nn.CrossEntropyLoss()
    loss = loss_fn(logits.view(-1, self.n_classes), labels.view(-1))
    return ((loss,) + output)

In [59]:
model = SentimentClassifierCLS(len(class_names))
model = model.to(device)

loading configuration file https://huggingface.co/cointegrated/rubert-tiny-sentiment-balanced/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/57f93109c036fe11e9ad81f652ae69595fd3bbd75be351143a3763f6048a9922.da90d84d661736cace212ae8531ab550f1c9f0938189c87cea6989b1ce8531d4
Model config BertConfig {
  "_name_or_path": "rubert-tiny-sentiment-balanced",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "emb_size": 312,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 312,
  "id2label": {
    "0": "negative",
    "1": "neutral",
    "2": "positive"
  },
  "initializer_range": 0.02,
  "intermediate_size": 600,
  "label2id": {
    "negative": 0,
    "neutral": 1,
    "positive": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 3,
  "p

### Обучение 2

In [60]:
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=2,              # total number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=16,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=100,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [61]:
trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=val_dataset,            # evaluation dataset
    compute_metrics = compute_metrics    # metrics to evaluate
)

trainer.train()

***** Running training *****
  Num examples = 14171
  Num Epochs = 2
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 3544


Step,Training Loss
100,1.1427
200,1.0242
300,0.9263
400,0.8879
500,0.8526
600,0.857
700,0.8013
800,0.8628
900,0.7997
1000,0.766


Saving model checkpoint to ./results/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-3500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=3544, training_loss=0.7297594547271729, metrics={'train_runtime': 257.801, 'train_samples_per_second': 109.938, 'train_steps_per_second': 13.747, 'total_flos': 0.0, 'train_loss': 0.7297594547271729, 'epoch': 2.0})

### Метрики на тесте 2

In [63]:
trainer.evaluate(eval_dataset=test_dataset, metric_key_prefix="test")

***** Running Evaluation *****
  Num examples = 787
  Batch size = 16


{'epoch': 2.0,
 'test_accuracy': 0.7382465057179162,
 'test_f1': 0.7404419714143651,
 'test_loss': 0.6582275032997131,
 'test_precision': 0.7442522895843777,
 'test_recall': 0.7382465057179162,
 'test_runtime': 2.479,
 'test_samples_per_second': 317.471,
 'test_steps_per_second': 20.17}

### Модель BertForSequenceClassification

https://huggingface.co/docs/transformers/model_doc/bert

In [64]:
model = BertForSequenceClassification.from_pretrained(PRE_TRAINED_MODEL_NAME)
model = model.to(device)
config = BertConfig.from_pretrained(PRE_TRAINED_MODEL_NAME)
config.num_labels = 3
model = BertForSequenceClassification(config) 

loading configuration file https://huggingface.co/cointegrated/rubert-tiny-sentiment-balanced/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/57f93109c036fe11e9ad81f652ae69595fd3bbd75be351143a3763f6048a9922.da90d84d661736cace212ae8531ab550f1c9f0938189c87cea6989b1ce8531d4
Model config BertConfig {
  "_name_or_path": "rubert-tiny-sentiment-balanced",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "emb_size": 312,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 312,
  "id2label": {
    "0": "negative",
    "1": "neutral",
    "2": "positive"
  },
  "initializer_range": 0.02,
  "intermediate_size": 600,
  "label2id": {
    "negative": 0,
    "neutral": 1,
    "positive": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 3,
  "p

### Обучение 3

In [65]:
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=2,              # total number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=16,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=100,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [66]:
trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=val_dataset,            # evaluation dataset
    compute_metrics = compute_metrics    # metrics to evaluate
)

trainer.train()

***** Running training *****
  Num examples = 14171
  Num Epochs = 2
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 3544


Step,Training Loss
100,1.103
200,1.1043
300,1.1013
400,1.1023
500,1.1011
600,1.0864
700,1.0401
800,0.9803
900,0.9242
1000,0.8578


Saving model checkpoint to ./results/checkpoint-500
Configuration saved in ./results/checkpoint-500/config.json
Model weights saved in ./results/checkpoint-500/pytorch_model.bin
Saving model checkpoint to ./results/checkpoint-1000
Configuration saved in ./results/checkpoint-1000/config.json
Model weights saved in ./results/checkpoint-1000/pytorch_model.bin
Saving model checkpoint to ./results/checkpoint-1500
Configuration saved in ./results/checkpoint-1500/config.json
Model weights saved in ./results/checkpoint-1500/pytorch_model.bin
Saving model checkpoint to ./results/checkpoint-2000
Configuration saved in ./results/checkpoint-2000/config.json
Model weights saved in ./results/checkpoint-2000/pytorch_model.bin
Saving model checkpoint to ./results/checkpoint-2500
Configuration saved in ./results/checkpoint-2500/config.json
Model weights saved in ./results/checkpoint-2500/pytorch_model.bin
Saving model checkpoint to ./results/checkpoint-3000
Configuration saved in ./results/checkpoint-3

TrainOutput(global_step=3544, training_loss=0.8393643036921998, metrics={'train_runtime': 247.3147, 'train_samples_per_second': 114.599, 'train_steps_per_second': 14.33, 'total_flos': 65320945614720.0, 'train_loss': 0.8393643036921998, 'epoch': 2.0})

### Метрики на тесте 3

In [67]:
trainer.evaluate(eval_dataset=test_dataset, metric_key_prefix="test")

***** Running Evaluation *****
  Num examples = 787
  Batch size = 16


{'epoch': 2.0,
 'test_accuracy': 0.6785260482846251,
 'test_f1': 0.6800540597256922,
 'test_loss': 0.7610854506492615,
 'test_precision': 0.6818606238108075,
 'test_recall': 0.6785260482846251,
 'test_runtime': 3.2481,
 'test_samples_per_second': 242.292,
 'test_steps_per_second': 15.393}

### Модель SentimentClassifier с аггрегированными cls

In [97]:
class SentimentClassifierCLSs(nn.Module):

  def __init__(self, n_classes):
    super().__init__()
    self.n_classes = n_classes
    self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
    self.drop = nn.Dropout(p=0.3)
    self.out = nn.Linear(self.bert.config.hidden_size*2, n_classes)
  
  def forward(self, input_ids, attention_mask, labels):
    _, pooled_output, hidden_states = self.bert(
      input_ids=input_ids,
      attention_mask=attention_mask,
      return_dict=False,
      output_hidden_states=True)
    cls = torch.stack(hidden_states) # [4, 8, 160, 312]
    cls = cls[:,:,0,:] # [4, 8, 312]
    cls = cls.mean(axis=0) # [8, 312] batch size x hidden size
    dropped = self.drop(torch.hstack([cls, pooled_output]))
    logits = self.out(dropped)
    output = (logits,)
    
    loss_fn = nn.CrossEntropyLoss()
    loss = loss_fn(logits.view(-1, self.n_classes), labels.view(-1))
    return ((loss,) + output)

In [98]:
model = SentimentClassifierCLSs(len(class_names))
model = model.to(device)

loading configuration file https://huggingface.co/cointegrated/rubert-tiny-sentiment-balanced/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/57f93109c036fe11e9ad81f652ae69595fd3bbd75be351143a3763f6048a9922.da90d84d661736cace212ae8531ab550f1c9f0938189c87cea6989b1ce8531d4
Model config BertConfig {
  "_name_or_path": "rubert-tiny-sentiment-balanced",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "emb_size": 312,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 312,
  "id2label": {
    "0": "negative",
    "1": "neutral",
    "2": "positive"
  },
  "initializer_range": 0.02,
  "intermediate_size": 600,
  "label2id": {
    "negative": 0,
    "neutral": 1,
    "positive": 2
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 3,
  "p

### Обучение 4

In [99]:
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=2,              # total number of training epochs
    per_device_train_batch_size=8,  # batch size per device during training
    per_device_eval_batch_size=16,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=100,
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [100]:
trainer = Trainer(
    model=model,                         # the instantiated 🤗 Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=val_dataset,            # evaluation dataset
    compute_metrics = compute_metrics    # metrics to evaluate
)

trainer.train()

***** Running training *****
  Num examples = 14171
  Num Epochs = 2
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 3544


Step,Training Loss
100,1.0425
200,0.9781
300,0.8976
400,0.8812
500,0.8612
600,0.8634
700,0.8197
800,0.8441
900,0.7951
1000,0.7494


Saving model checkpoint to ./results/checkpoint-500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-1000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-1500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-2000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-2500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-3000
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
Saving model checkpoint to ./results/checkpoint-3500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=3544, training_loss=0.7211605340040564, metrics={'train_runtime': 243.5365, 'train_samples_per_second': 116.377, 'train_steps_per_second': 14.552, 'total_flos': 0.0, 'train_loss': 0.7211605340040564, 'epoch': 2.0})

### Метрики на тесте 4

In [101]:
trainer.evaluate(eval_dataset=test_dataset, metric_key_prefix="test")

***** Running Evaluation *****
  Num examples = 787
  Batch size = 16


{'epoch': 2.0,
 'test_accuracy': 0.7344345616264295,
 'test_f1': 0.7379401393808378,
 'test_loss': 0.6616185903549194,
 'test_precision': 0.7440049259700108,
 'test_recall': 0.7344345616264295,
 'test_runtime': 4.3018,
 'test_samples_per_second': 182.948,
 'test_steps_per_second': 11.623}