## Finetuning NLI-deberta-v3 model
- This is based on the Prof. Mihai Surdeanu's text book <a href="https://github.com/clulab/gentlenlp/blob/main/notebooks/chap13_classification_bert.ipynb">Gentle NLP Chapter 13 Classification using BERT model</a>
- Modified for NLI evaluation and analysis over SICCK dataset
- Reference: <a href="https://huggingface.co/cross-encoder/nli-deberta-v3-base">HuggingFace nli-deberta-v3-base </a>

- Author: Sushma Anand Akoju, Email: sushmaakoju@arizona.edu

In [None]:
!pip install datasets
!pip install transformers
!pip install sentencepiece
!pip install accelerate
!pip install 'transformers[torch]'

# Text Classification Using Transformer Networks (Deberta and Roberta)

Some initialization:

In [2]:
import random
import torch
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

# enable tqdm in pandas
tqdm.pandas()

# set to True to use the gpu (if there is one available)
use_gpu = True

# select device
device = torch.device('cuda' if use_gpu and torch.cuda.is_available() else 'cpu')
print(f'device: {device.type}')

# def set_seed(seed: int = 42) -> None:
#     np.random.seed(seed)
#     random.seed(seed)
#     torch.manual_seed(seed)
#     torch.cuda.manual_seed(seed)
#     # When running on the CuDNN backend, two further options must be set
#     # torch.backends.cudnn.deterministic = True
#     # torch.backends.cudnn.benchmark = False
#     # Set a fixed value for the hash seed
#     os.environ["PYTHONHASHSEED"] = str(seed)
#     print(f"Random seed set as {seed}")

# random seed
# seed = 12345

# set random seed
# if seed is not None:
#     print(f'random seed: {seed}')
#     random.seed(seed)
#     np.random.seed(seed)
#     torch.manual_seed(seed)

device: cuda


In [3]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)

Mounted at /content/drive/


In [4]:
import os
import pandas as pd
# path = "/content/drive/MyDrive/Colab Notebooks/natural-logic/final-datasets/block-wise-data/blocks-dec26/data/15/"
# assert os.path.exists(path), "Error"
# temp = pd.read_csv(os.path.join(path,"SICCK-zero-shot-analysis-apr24.csv"))
# df = temp[temp.columns[1:]]
# df.head(), len(df)

In [5]:
label2id = {
    'Contradiction': 0,
    'Neutral': 1,
    'FE': 2,
    'RE': 2,
}
label2id_roberta = {
    'Contradiction': 2,
    'Neutral': 1,
    'FE': 0,
    'RE': 0,
}
# df['label'] = df['CompressedGT'].map(lambda x: label2id[x])
# df['label4roberta'] = df['CompressedGT'].map(lambda x: label2id_roberta[x])

Read the train/dev/test datasets and create a HuggingFace `Dataset` object:

## Rolling window (circular array style) splits for 5-fold Cross validation
- to account for the few extra indices that might run beyond the length of data for test or train sets

In [6]:
# import math
# d = list(range(len(df)))
# n = len(d)
# test_len = math.ceil(1304/5)
# train_len = n-(2*test_len)
# val_len = n-train_len - test_len
# print(train_len, test_len, val_len)
# math.ceil(1304/5), n-(2*261),n, n-test_len

In [7]:
# len(d)

In [8]:
# def circular_array(starting_index, ending_index, d):
#   idx = d
#   idx = np.roll(idx, -starting_index)[:(len(idx)-starting_index+ending_index)%len(idx)]

#   return idx

In [9]:
# len(circular_array(261+test_len,261+test_len+1043, d))

In [10]:
# counter = 1
# folds = []
# columns = ['Premise', 'Hypothesis', 'label','label4roberta', 'CompressedGT', 'Modifier Type',
#            'Modifier',	'Premise/Hypothesis/Both',	'Part of Premise/Hypothesis Modified']
# # test_len = 261
# for i in range(0,n, test_len):
#   # print(i, len(d[i:i+test_len]), len(circular_array(i, i+test_len, d)))
#   test = df.iloc[circular_array(i, i+test_len, d)][columns]
#   train = df.iloc[circular_array(i+test_len, i+test_len+1043, d)][columns]
#   print(len(test), len(train))
#   # val = df.iloc[circular_array(i+test_len+1130, i+test_len+1130+test_len, d)][columns]
#   counter += 1
#   folds.append({"train":train, "test":test})

In [11]:
# len(folds[0]["train"]), len(folds[0]["test"])

### Save all data into one excel sheet

In [12]:
# output_path = "/content/drive/MyDrive/Colab Notebooks/natural-logic/june12"
# for i,fold in enumerate(folds):
#   with pd.ExcelWriter(os.path.join(output_path, "fold"+str(i)+".xlsx")) as writer:
#     fold["train"].to_excel(writer, sheet_name="train", index=False )
#     fold["test"].to_excel(writer, sheet_name="test", index=False )

In [13]:
# output_path = "/content/drive/MyDrive/Colab Notebooks/natural-logic/june12"
# with pd.ExcelWriter(os.path.join(output_path, "five_folds.xlsx")) as writer:

#   for i,fold in enumerate(folds):
#       # fold["train"].to_excel(writer, sheet_name="train", index=False )
#     fold["test"].to_excel(writer, sheet_name="fold"+str(i), index=False )

### Modifier type distribution each of the test set

In [14]:
# with pd.ExcelWriter(os.path.join(output_path, "fold_distribution.xlsx")) as writer:
#   for i,fold in enumerate(folds):
#     print(i,fold["test"].groupby(["Modifier Type"]).count().reset_index()[["Modifier Type","CompressedGT"]])
#     fold["test"].groupby(["Modifier Type"]).count().reset_index()[["Modifier Type","CompressedGT"]].to_excel(writer, sheet_name="fold"+str(i), index=False)

### Label-wise distribution in test splits

In [15]:
# with pd.ExcelWriter(os.path.join(output_path, "fold_label_distribution.xlsx")) as writer:
#   for i,fold in enumerate(folds):
#     print(i,fold["test"].groupby(["CompressedGT"]).count().reset_index()[["CompressedGT", "Modifier Type"]])
#     fold["test"].groupby(["CompressedGT"]).count().reset_index()[["CompressedGT","Modifier Type"]].to_excel(writer, sheet_name="fold"+str(i), index=False)

### Label-Modifier type count distribution for analysis : to verify the gap or imbalanced distribution

In [16]:
# with pd.ExcelWriter(os.path.join(output_path, "fold_label_qtype_distribution.xlsx")) as writer:
#   for i,fold in enumerate(folds):
#     print(i,fold["test"].groupby(["CompressedGT", "Modifier Type"]).count().reset_index()[["CompressedGT", "Modifier Type", "Modifier"]])
#     fold["test"].groupby(["CompressedGT", "Modifier Type"]).count().reset_index()[["CompressedGT","Modifier Type", "Modifier"]].to_excel(writer, sheet_name="fold"+str(i), index=False)

## Load data

In [None]:
filenames = ["fold0.xlsx", "fold1.xlsx", "fold2.xlsx", "fold3.xlsx", "fold4.xlsx"]
path = "/content/drive/MyDrive/Colab Notebooks/natural-logic/june12/data"
output_path = "/content/drive/MyDrive/Colab Notebooks/natural-logic/june12/randomseed/june18th"
folds = []
columns = ['Premise', 'Hypothesis', 'label', 'CompressedGT', 'Modifier Type',
           'Modifier',	'Premise/Hypothesis/Both',	'Part of Premise/Hypothesis Modified']
for i,file in enumerate(filenames):
  train = pd.read_excel(os.path.join(path, file), sheet_name="train").rename(columns={"label4roberta":'labels'})
  test = pd.read_excel(os.path.join(path, file), sheet_name="test").rename(columns={"label4roberta":'labels'})
  print(len(train), len(test))
  folds.append({"train":train[columns], "test":test[columns]})
  print(i)

In [18]:
len(folds)

5

In [19]:
for fold in folds:
  print(len(fold["train"]), len(fold["test"]))

1043 261
1043 261
1043 261
1043 261
1043 261


## Create data splits with premise, hypothesis as well as hypothesis, premise for **Test** set predictions to label:
- Forward Entailment
- Reverse Entailment
- Neutral
- Contradiction

In [20]:
def read_data(data):
    # concatenate title and description, and remove backslashes
    data['text'] = data['Premise'] + " [SEP] " + data['Hypothesis']
    data['text'] = data['text'].str.replace('\\', ' ', regex=False)
    return data

In [21]:
def read_data_reverse(data):
    # concatenate title and description, and remove backslashes
    data['text'] = data['Hypothesis'] + " [SEP] " + data['Premise']
    data['text'] = data['text'].str.replace('\\', ' ', regex=False)
    return data

### Compute metrics for validation and test

In [22]:
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

def compute_metrics(eval_pred):
    y_true = eval_pred.label_ids
    y_pred = np.argmax(eval_pred.predictions, axis=-1)
    return {'accuracy': accuracy_score(y_true, y_pred), 'recall': recall_score(y_true, y_pred, average='micro'),
            'f1':f1_score(y_true, y_pred, average='micro'), 'precision':precision_score(y_true, y_pred, average='micro')}
def compute_test_metrics(y_true, y_pred):
    return {'accuracy': accuracy_score(y_true, y_pred), 'recall': recall_score(y_true, y_pred, average='micro'),
            'f1':f1_score(y_true, y_pred, average='micro'), 'precision':precision_score(y_true, y_pred, average='micro')}

### To include FE, RE and Neutral label calculation and scores for **Test**

In [23]:
from sklearn.metrics import classification_report
def test_eval(trainer, ds, fold, model_name ):
  test_ds = ds['test'].map(
      tokenize,
      batched=True,
      remove_columns=['Premise', 'Hypothesis', 'text'],
  )
  rev_test_ds = ds['rev_test'].map(
      tokenize,
      batched=True,
      remove_columns=['Premise', 'Hypothesis', 'text'],
  )
  test_ds.to_pandas()
  output = trainer.predict(test_ds)
  rev_scores = trainer.predict(rev_test_ds)

  y_true = output.label_ids
  y_preds = np.argmax(output.predictions, axis=-1)
  y_rev_score_preds = np.argmax(rev_scores.predictions, axis=-1)
  labels = []

  for i in range(len(y_preds)):
    if y_preds[i] == 1:
      labels.append("FE")
    elif y_preds[i] == 0:
      labels.append("Contradiction")
    else:
      if y_rev_score_preds[i] == 1:
        labels.append("RE")
      else:
        labels.append("Neutral")
  print(classification_report(y_true, y_preds, labels=[0, 1, 2]))
  res = compute_test_metrics(y_true, y_preds)
  res['fold'] = fold
  res['model_name'] = model_name
  return y_true, y_preds, res, labels

In [24]:
# model = AutoModelForSequenceClassification.from_pretrained('cross-encoder/nli-deberta-v3-base', num_labels=3)
# tokenizer = AutoTokenizer.from_pretrained('cross-encoder/nli-deberta-v3-base')

### Get this_train, this_validation & this_test set from a this_fold

In [25]:
def get_dataset(fold, model_name):
  # labels_header_name = 'label'

  columns = ['Premise', 'Hypothesis', 'label']

  train_df = read_data(fold["train"][columns])
  test_df = read_data(fold["test"][columns])
  rev_test_df = read_data_reverse(fold["test"][columns])
  print(test_df.columns)

  train_df, eval_df = train_test_split(train_df, train_size=0.9)
  train_df.reset_index(inplace=True, drop=True)
  eval_df.reset_index(inplace=True, drop=True)
  test_df.reset_index(inplace=True, drop=True)
  rev_test_df.reset_index(inplace=True, drop=True)

  print(f'train rows: {len(train_df.index):,}')
  print(f'eval rows: {len(eval_df.index):,}')
  print(f'test rows: {len(test_df.index):,}')
  print(f'test rows: {len(rev_test_df.index):,}')

  ds = DatasetDict()
  ds['train'] = Dataset.from_pandas(train_df)
  ds['validation'] = Dataset.from_pandas(eval_df)
  ds['test'] = Dataset.from_pandas(test_df)
  ds['rev_test'] = Dataset.from_pandas(rev_test_df)

  print(ds)
  return ds, test_df, rev_test_df

### CustomTrainer for CrossEntropyLoss but we train for both custom and default Trainer classes in HuggingFace
- Note: we did not see any difference between the two

In [26]:

import torch
from torch import nn
from transformers import Trainer
from accelerate import Accelerator

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss (suppose one has 3 labels with different weights)
        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 2.0, 3.0]))
        loss_fct.to('cuda')
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

## Fine tune "cross-encoder/nli-deberta-v3-base" for 1304 examples for five folds.
- Use rolling window for train-test folds
- split validation from training (same size as test)
- test size: 261

### Tokenize & Train one model at a time for all folds

In [27]:
from sklearn.model_selection import train_test_split
from datasets import Dataset, DatasetDict
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer, set_seed
import time

model_name = ""
model_names =["cross-encoder/nli-deberta-v3-base",	"ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli"]
# model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
# tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(examples):
    return tokenizer(examples['text'], truncation=True)

def train(model_name, this_path, folds, seed):
  epochs = [4, 8]
  batch_sizes = [8,16,32]
  m = model_name.split("/")[1]
  all_scores = []
  # tokenizer = AutoTokenizer.from_pretrained(model_name)
  # model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
  # tokenizer = AutoTokenizer.from_pretrained(model_name)
  for num_epochs in epochs:
    for batch_size in batch_sizes:

      for i,fold in enumerate(folds):
          print("\n***********************************************************************************\n")
          print("\n**************** The number of epochs, batch_size and fold respectively are: ",num_epochs, batch_size, i,"************************\n")
          time.sleep(60)
          torch.cuda.empty_cache()
          #this set_seed is imported from transformers
          set_seed(seed)
          #Load this pretrained model
          model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

          ds, test_df, rev_test_df = get_dataset(fold,model_name)
          train_ds = ds['train'].map(
            tokenize, batched=True,
            remove_columns=['Premise', 'Hypothesis', 'text'],
          )
          eval_ds = ds['validation'].map(
              tokenize,
              batched=True,
              remove_columns=['Premise', 'Hypothesis', 'text'],
          )

          weight_decay = 0.01
          tx_model_name = f'{model_name}-sequence-classification'

          training_args = TrainingArguments(
              output_dir=os.path.join(output_path,m+"_"+str(num_epochs)+str(batch_size)+"trainer"),
              log_level='error',
              num_train_epochs=num_epochs,
              per_device_train_batch_size=batch_size,
              per_device_eval_batch_size=batch_size,
              evaluation_strategy='epoch',
              weight_decay=weight_decay, seed = seed,
          )
          trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_ds,
            eval_dataset=eval_ds,
            compute_metrics=compute_metrics,
            tokenizer=tokenizer,
          )
          trainer.train()

          # customTrainer  = CustomTrainer(
          #   model=model,
          #   args=training_args,
          #   train_dataset=train_ds,
          #   eval_dataset=eval_ds,
          #   compute_metrics=compute_metrics,
          #   tokenizer=tokenizer,
          # )

          # customTrainer.train()
          y_true, y_pred, results, labels = test_eval(trainer, ds, i, model_name )
          # y_true1, y_pred1, results1, labels1 = test_eval(customTrainer, ds, i, model_name )

          all_scores.append(results)
          fold["test"]["label"]= y_true
          fold["test"]["predictions"] = y_pred
          # fold["test"]["predictions2"] = y_pred1
          fold["test"]["text"] = test_df['text']
          fold["test"]["pred_labels"] =  labels
          filename = "five_"+m+"_"+str(num_epochs)+"_"+str(batch_size)+"_"+str(i)+"_"+str(seed)+"_test.xlsx"
          fold["test"].to_csv(os.path.join(this_path, filename))
  return all_scores

In [28]:
torch.cuda.get_device_name(0)

'NVIDIA A100-SXM4-40GB'

In [29]:
# if tokenizer:
#   del tokenizer
# if model:
#   del model

### "cross-encoder/nli-deberta-v3-base"

In [None]:
from accelerate import Accelerator
all_scores = []
predictions = []
# if tokenizer:
#   del tokenizer
# if model:
#   del model
#for model_name in model_names:
model_name = model_names[0]
m = model_name.split("/")[1]
this_path = os.path.join(output_path, m)
if not os.path.exists(this_path):
  os.mkdir(this_path)
assert os.path.exists(this_path), "%s Path does not exists!"%(this_path)

seeds  = [12345, 34567, 56789, 98765, 76543]
for seed in seeds:
  torch.cuda.empty_cache()
  set_seed(seed)
  # model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  all_scores.append(train(model_name, this_path, folds, seed))

In [31]:
all_scores_deberta = all_scores

In [32]:
import pandas as pd
# import pycm
import matplotlib.pyplot as plt
import os
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix, classification_report
import altair as alt

In [33]:
def get_classification_report(ytrue, ypred, model_name, mtype="", svo=""):
  res = classification_report(ytrue, ypred, output_dict=True)
  print(classification_report(ytrue, ypred))
  precision_m = res['macro avg']['precision']
  recall_m = res['macro avg']['recall']
  f1_macro = res['macro avg']['f1-score']
  acc = res['accuracy']
  return {"model_name":model_name, "modifier_type":mtype,"svo": svo,
          "f1_macro":f1_macro, "precision_m":precision_m, "recall_m":recall_m, "acc":acc}

In [34]:
gt = 'CompressedGT'
pred = 'pred_labels'
svo = "Part of Premise/Hypothesis Modified"
mod_type_col = 'Modifier Type'

In [35]:
model_folders = ["nli-deberta-v3-base", "roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli"]
files = []
files = [os.path.join(os.path.join(output_path, model_folders[0]), file) for file in os.listdir(os.path.join(output_path,model_folders[0])) ]

In [36]:
len(files)

150

In [37]:
os.path.basename(files[0]),os.path.basename(files[0])[25:].split("_")[0:4], os.path.basename(files[39])[25:].split("_")[0:4]

('five_nli-deberta-v3-base_4_8_0_12345_test.xlsx',
 ['4', '8', '0', '12345'],
 ['4', '16', '4', '34567'])

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
model_names = ['nli-deberta-v3-base',
       'ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli',
       'pair-classification-decomposable-attention-elmo']

finetuned_models_modtype_scores = []
# folds = {}
all = {}
for file in files:
  model_name = ""
  if "deberta" in os.path.basename(file):
    model_name = model_names[0]
  if "roberta" in os.path.basename(file):
    model_name = model_names[1]
  #print(files)
  print(file)

  df = pd.read_csv(file)
  df[pred] = df[pred].replace("Negation", "Contradiction")
  df[svo] = df[svo].str.lower().str.strip()

  epochs, batch_size, fold, seed = os.path.basename(file)[25:].split("_")[0:4]

  print(epochs, batch_size, fold, seed )
  if (epochs, batch_size,seed, model_name) not in all.keys():
    all[(epochs, batch_size, seed, model_name)] = []

  all[(epochs, batch_size, seed, model_name)].append(df)

In [None]:
overall_scores = []
modifier_type_scores = []
svo_scores = []
for key in all.keys():
  df = pd.concat(list(all[key]), axis=0)

  for modifier_type in df[mod_type_col].unique():
    ytrue = df[df[mod_type_col] == modifier_type][gt].to_list()
    #print(ytrue)
    ypred = df[df[mod_type_col] == modifier_type][pred].to_list()
    precision = precision_score(ytrue, ypred, average='micro')
    f1 = f1_score(ytrue, ypred,average='micro')
    recall = recall_score(ytrue, ypred, average='micro')
    acc = accuracy_score(ytrue, ypred,)
    res = get_classification_report(ytrue, ypred, model_name,modifier_type, "")
    res["num_epochs"] = key[0]
    res["batch_size"] = key[1]
    res["model_name"] = key[3]
    res["seed"] = key[2]
    modifier_type_scores.append(res)

  for svo_type in df[svo].unique():
    ytrue = df[df[svo] == svo_type][gt].to_list()
    #print(ytrue)
    ypred = df[df[svo] == svo_type][pred].to_list()
    res2 = get_classification_report(ytrue, ypred, model_name, "", svo_type)
    res2["num_epochs"] = key[0]
    res2["batch_size"] = key[1]
    res2["model_name"] = key[3]
    res2["seed"] = key[2]
    svo_scores.append(res2)

  ypred = df[pred]
  ytrue = df[gt]
  res1 = get_classification_report(ytrue, ypred, model_name, "", "")
  res1["num_epochs"] = key[0]
  res1["batch_size"] = key[1]
  res1["model_name"] = key[3]
  res1["seed"] = key[2]
  overall_scores.append(res1)

In [40]:
pd.DataFrame.from_records(overall_scores).to_excel(os.path.join(output_path,"overall_scores_deberta.xlsx"))
pd.DataFrame.from_records(svo_scores).to_excel(os.path.join(output_path,"overall_svo_scores_deberta.xlsx"))
pd.DataFrame.from_records(modifier_type_scores).to_excel(os.path.join(output_path,"overall_modifier_scores_deberta.xlsx"))

### "ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli"

Tokenize the texts:

In [41]:
# from transformers.models.deberta.modeling_deberta import DebertaModel, DebertaPreTrainedModel

In [42]:
# from transformers import DebertaForSequenceClassification
# model = DebertaForSequenceClassification.from_pretrained(models[0])
# model.train()

Create the transformer model:

In [43]:
# from torch import nn
# from transformers.modeling_outputs import SequenceClassifierOutput

# from transformers.models.bert.modeling_bert import BertModel, BertPreTrainedModel

# #BERT, SentenceTransformer pretrained model
# # https://github.com/huggingface/transformers/blob/65659a29cf5a079842e61a63d57fa24474288998/src/transformers/models/bert/modeling_bert.py#L1486

# class BertForSequenceClassification(BertPreTrainedModel):
#     def __init__(self, config):
#         super().__init__(config)
#         self.num_labels = config.num_labels
#         self.bert = BertModel(config)
#         self.dropout = nn.Dropout(config.hidden_dropout_prob)
#         self.classifier = nn.Linear(config.hidden_size, config.num_labels)
#         self.init_weights()

#     def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, labels=None, **kwargs):
#         outputs = self.bert(
#             input_ids,
#             attention_mask=attention_mask,
#             token_type_ids=token_type_ids,
#             **kwargs,
#         )
#         cls_outputs = outputs.last_hidden_state[:, 0, :]
#         cls_outputs = self.dropout(cls_outputs)
#         logits = self.classifier(cls_outputs)
#         loss = None
#         if labels is not None:
#             loss_fn = nn.CrossEntropyLoss()
#             loss = loss_fn(logits, labels)
#         return SequenceClassifierOutput(
#             loss=loss,
#             logits=logits,
#             hidden_states=outputs.hidden_states,
#             attentions=outputs.attentions,
#         )

In [44]:
# sliding window folds
# 15
# 10%
# 130 val
# 130 test
# 1000 train

# 1304 indices
# window size 130

# for fold in folds:
#   train, test, val = fold


In [45]:
# from transformers import AutoConfig

# config = AutoConfig.from_pretrained(
#     transformer_name,
#     num_labels=len(labels),
# )

# model = (
#     BertForSequenceClassification
#     .from_pretrained(transformer_name, config=config)
# )

Create the trainer object and train:

In [46]:
# from sklearn.metrics import accuracy_score

# def compute_metrics(eval_pred):
#     y_true = eval_pred.label_ids
#     y_pred = np.argmax(eval_pred.predictions, axis=-1)
#     return {'accuracy': accuracy_score(y_true, y_pred)}