In [3]:
!pip install torcheval

Collecting torcheval
  Downloading torcheval-0.0.7-py3-none-any.whl.metadata (8.6 kB)
Downloading torcheval-0.0.7-py3-none-any.whl (179 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.2/179.2 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: torcheval
Successfully installed torcheval-0.0.7


In [4]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from transformers import DistilBertTokenizer, DistilBertModel
#from datasets import Dataset

from torch.utils.data import Dataset, DataLoader

from transformers.pipelines.pt_utils import KeyDataset
from tqdm import tqdm
import pandas as pd

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
torch.random.manual_seed(0)

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertModel.from_pretrained("distilbert-base-uncased")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [6]:
model

DistilBertModel(
  (embeddings): Embeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer): Transformer(
    (layer): ModuleList(
      (0-5): 6 x TransformerBlock(
        (attention): MultiHeadSelfAttention(
          (dropout): Dropout(p=0.1, inplace=False)
          (q_lin): Linear(in_features=768, out_features=768, bias=True)
          (k_lin): Linear(in_features=768, out_features=768, bias=True)
          (v_lin): Linear(in_features=768, out_features=768, bias=True)
          (out_lin): Linear(in_features=768, out_features=768, bias=True)
        )
        (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (ffn): FFN(
          (dropout): Dropout(p=0.1, inplace=False)
          (lin1): Linear(in_features=768, out_features=3072, bias=True)
          (lin2): Li

In [7]:
def get_n_params(model):
    pp=0
    for p in list(model.parameters()):
        nn=1
        for s in list(p.size()):
            nn = nn*s
        pp += nn
    return pp
n1, n2 = get_n_params(model.embeddings), get_n_params(model.transformer)
n1, n2, n1+n2

(23835648, 42527232, 66362880)

In [8]:
train_test_data = pd.read_csv('/kaggle/input/human-vs-qwen25-n-phi3/train_test_data.csv')

In [9]:
import os
import random
import numpy as np


def enable_determinism():
    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
    torch.use_deterministic_algorithms(True)

def fix_seeds(seed: int):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.mps.manual_seed(seed)

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(0)

enable_determinism()
fix_seeds(0)

In [10]:
# Класс датасета
class AnswersDataset(Dataset):
  def __init__(self, tokenizer, data_df, sampletype,  max_len=512):
    self.raw_data = data_df[data_df['sample_type']==sampletype]

    self.max_len = max_len
    self.tokenizer = tokenizer
    self.inputs_q = []
    self.inputs_a = []
    self.masks_q = []
    self.masks_a = []
    self.targets = []

    self.class_mapper = {'Human': 0, 'Phi3-mini': 1, 'Qwen25': 2}

    self.class_mapper_inv = {v: k for k, v in self.class_mapper.items()}

    self._build()


  def __len__(self):
    return len(self.inputs_a)

  def __getitem__(self, index):
    question_ids = self.inputs_q[index].squeeze()
    answers_ids = self.inputs_a[index].squeeze()

    question_mask = self.masks_q[index].squeeze()
    answer_mask = self.masks_a[index].squeeze()

    target_ids = self.targets[index]

    #src_mask    = self.inputs[index]["attention_mask"].squeeze()  # might need to squeeze
    #target_mask = self.targets[index]["attention_mask"].squeeze()  # might need to squeeze

    return  question_ids, answers_ids, target_ids, question_mask, answer_mask
    #{"question_ids": question_ids, "answers_ids": answers_ids, "target_ids": target_ids}

  def _build(self):
    self._buil_examples_from_files()

  def _buil_examples_from_files(self):
    # REPLACE_NO_SPACE = re.compile("[.;:!\'?,\"()]")
    # REPLACE_WITH_SPACE = re.compile("()|(\-)|(\/)")

    for i, row in tqdm(self.raw_data.iterrows(), total=self.raw_data.shape[0]):

      if pd.isna(row['Answers']):
        continue

      text_question = row['Question']
      text_answer = row['Answers']

      line_question = text_question.strip()
      line_answer = text_answer.strip()


      # line = REPLACE_NO_SPACE.sub("", line)
      # line = REPLACE_WITH_SPACE.sub("", line)
      # line = line + ' '

      target = self.class_mapper[row['Author']]

       # tokenize inputs
      tokenized_questions, q_mask = [v for k, v in self.tokenizer.batch_encode_plus(
          [line_question], max_length=self.max_len, padding='max_length', return_tensors="pt",
          truncation=True
      ).items()]
      tokenized_answers, a_mask = [v for k, v in self.tokenizer.batch_encode_plus(
          [line_answer], max_length=self.max_len, padding='max_length', return_tensors="pt",
          truncation=True
      ).items()]

       # tokenize targets


      self.inputs_q.append(tokenized_questions)
      self.inputs_a.append(tokenized_answers)
      self.masks_q.append(q_mask)
      self.masks_a.append(a_mask)
      self.targets.append(target)

In [11]:
max_seq_length = 128 # with 256 one epoch with 2 evaluations take 2 hours together
train_dataset = AnswersDataset(tokenizer, train_test_data, 'train', max_len=max_seq_length)
#test_dataset = AnswersDataset(tokenizer, train_test_data, 'test', max_len=max_seq_length)
val_dataset = AnswersDataset(tokenizer, train_test_data, 'val', max_len=max_seq_length)

100%|██████████| 67699/67699 [03:40<00:00, 307.37it/s]
100%|██████████| 21233/21233 [01:10<00:00, 299.70it/s]


In [12]:
g = torch.Generator()
batch_size=128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, generator=g,
                          pin_memory=True, num_workers=2, worker_init_fn=seed_worker)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,
                        pin_memory=True, num_workers=2, worker_init_fn=seed_worker)
#test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [13]:
from tqdm import tqdm
import torch.nn as nn
class DistilBERTClf_FCtuned_v10(nn.Module):
    def __init__(self, max_seq_length=512):
        super(DistilBERTClf_FCtuned_v10, self).__init__()
        self.dbmodel = model
        # v1 one head of additional attention
        # v2 two heads
        # v3 three heads
        # v4 one head, no dropout in the foremost attention
        # v5 GELU between FC and the attention
        # v6 one head, 0.2 dropout in the foremost attention
        # v7 ELU between FC and the attention (and 0.2 dropout)
        # v8 combination of 2 best (in precision and recall OvR): +2 heads and GELU
        # v9 ELU between FC and the attention
        self.model_out_features =  self.dbmodel.transformer.layer[-1].ffn.lin2.out_features
        self.attention_layer = nn.MultiheadAttention(embed_dim=self.model_out_features,
                                                     num_heads=3, dropout=0.1, batch_first=True)
        self.length=max_seq_length
        self.fc = nn.Linear(in_features=self.length*self.model_out_features, out_features = 3)
        self.activation_inner = nn.GELU()
        self.activation = nn.Softmax(dim=1)
        self.freeze_layers()


    def freeze_layers(self):
      for param in self.dbmodel.parameters():
        param.requires_grad = False



    def forward(self, qx, ax, qmask, amask):
        #qout = self.dbmodel(input_ids=qx, attention_mask=qmask).last_hidden_state
        aout = self.dbmodel(input_ids=ax, attention_mask=amask).last_hidden_state
        out, out_weights = self.attention_layer(aout, aout, aout)
        out = out.reshape(out.shape[0], -1) 
        out = self.activation_inner(out)
        pred = self.fc(out)

        return self.activation(pred)

In [14]:
from torcheval.metrics.functional import multiclass_confusion_matrix as conf_matrix

def evaluation_epoch(model, loader, loss_obj):
  n_correct, n_total = 0, 0
  n_correct_oo = 0
  total_loss = 0

  total_conf_matrix = torch.zeros(size=(3, 3))

  with torch.no_grad():
    for chunk in tqdm(loader):
      qx, ax, y, qmask, amask = chunk
      pred = model(qx.to(device), ax.to(device), qmask.to(device), amask.to(device)).cpu()
      apred = torch.argmax(pred, 1) # for all-vs-all classification

      oopred = torch.where(apred > 0, 1, 0) # for one-vs-others classification
      ooy = torch.where(y > 0, 1, 0)

      n_correct += (apred == y).sum()
      n_correct_oo += (oopred == ooy).sum()

      n_total += y.shape[0]

      total_loss += y.shape[0] * loss_obj(pred, y).item()
      total_conf_matrix+=conf_matrix(apred, y, num_classes=3)

  return {'accuracy_ava': n_correct/n_total, 'loss': total_loss/n_total,
          'accuracy_ovo':n_correct_oo/n_total, 'conf_matrix': total_conf_matrix}

def train_neural_net(model, train_loader, test_loader):
  loss = nn.CrossEntropyLoss()
  optimizer=torch.optim.Adam([{'params': model.attention_layer.parameters()},
                              {'params': model.fc.parameters()}], lr=1e-4)
  n_epochs=8
  loss_train_history = [] # логируется всегда

  # логируются каждую эпоху

  train_epoch_evals = []
  test_epoch_evals = []

  for _ in range(n_epochs):
    i=0
    model.train().to(device)
    for train_chunk in tqdm(train_loader):
        qx, ax, y, qmask, amask = train_chunk
        optimizer.zero_grad(set_to_none=True)


        pred = model(qx.to(device), ax.to(device), qmask.to(device), amask.to(device))
        loss_val = loss(pred, y.to(device)) #.long()
        loss_val.backward()

        loss_val_item = loss_val.detach().cpu().item()

        optimizer.step()
        loss_train_history.append(loss_val_item)
        i+=1
        if i % 100 == 0:
          print(f'train step {i}: train loss = {loss_val_item :.3f}')
        #break

    model.eval()
    #model.cpu()

    #train
    print('train evaluation')
    train_eval = evaluation_epoch(model, train_loader, loss)
    print(f"epoch {_}: train ava accuracy = {train_eval['accuracy_ava'] :.3f}")
    print(f"epoch {_}: train loss = {train_eval['loss'] :.3f}")
    print(f"epoch {_}: train ovo accuracy = {train_eval['accuracy_ovo'] :.3f}")
    train_epoch_evals.append(train_eval)

    #test
    print('test evaluation')
    test_eval = evaluation_epoch(model, test_loader, loss)
    print(f"epoch {_}: test ava accuracy = {test_eval['accuracy_ava'] :.3f}")
    print(f"epoch {_}: test loss = {test_eval['loss'] :.3f}")
    print(f"epoch {_}: test ovo accuracy = {test_eval['accuracy_ovo'] :.3f}")
    test_epoch_evals.append(test_eval)

  return {'training_loss_history': loss_train_history,
          'train_epochs_res': train_epoch_evals,
          'test_epochs_res': test_epoch_evals
          }

In [15]:
enable_determinism()
fix_seeds(0)

model1 = DistilBERTClf_FCtuned_v10(max_seq_length) 
train_hist = train_neural_net(model1, train_loader, val_loader)

  self.pid = os.fork()
 19%|█▉        | 100/529 [00:51<03:39,  1.96it/s]

train step 100: train loss = 0.840


 38%|███▊      | 200/529 [01:44<03:00,  1.82it/s]

train step 200: train loss = 0.779


 57%|█████▋    | 300/529 [02:40<02:06,  1.82it/s]

train step 300: train loss = 0.836


 76%|███████▌  | 400/529 [03:35<01:12,  1.78it/s]

train step 400: train loss = 0.722


 95%|█████████▍| 500/529 [04:31<00:16,  1.81it/s]

train step 500: train loss = 0.744


100%|██████████| 529/529 [04:47<00:00,  1.84it/s]


train evaluation


100%|██████████| 529/529 [04:22<00:00,  2.01it/s]


epoch 0: train ava accuracy = 0.777
epoch 0: train loss = 0.767
epoch 0: train ovo accuracy = 0.905
test evaluation


100%|██████████| 166/166 [01:22<00:00,  2.01it/s]


epoch 0: test ava accuracy = 0.749
epoch 0: test loss = 0.794
epoch 0: test ovo accuracy = 0.903


 19%|█▉        | 100/529 [00:55<03:57,  1.81it/s]

train step 100: train loss = 0.724


 38%|███▊      | 200/529 [01:51<03:02,  1.81it/s]

train step 200: train loss = 0.763


 57%|█████▋    | 300/529 [02:46<02:07,  1.80it/s]

train step 300: train loss = 0.735


 76%|███████▌  | 400/529 [03:42<01:11,  1.79it/s]

train step 400: train loss = 0.753


 95%|█████████▍| 500/529 [04:37<00:16,  1.80it/s]

train step 500: train loss = 0.797


100%|██████████| 529/529 [04:54<00:00,  1.80it/s]


train evaluation


100%|██████████| 529/529 [04:22<00:00,  2.01it/s]


epoch 1: train ava accuracy = 0.810
epoch 1: train loss = 0.737
epoch 1: train ovo accuracy = 0.917
test evaluation


100%|██████████| 166/166 [01:22<00:00,  2.02it/s]


epoch 1: test ava accuracy = 0.787
epoch 1: test loss = 0.757
epoch 1: test ovo accuracy = 0.917


 19%|█▉        | 100/529 [00:55<03:58,  1.80it/s]

train step 100: train loss = 0.737


 38%|███▊      | 200/529 [01:51<03:03,  1.79it/s]

train step 200: train loss = 0.686


 57%|█████▋    | 300/529 [02:46<02:07,  1.80it/s]

train step 300: train loss = 0.719


 76%|███████▌  | 400/529 [03:42<01:11,  1.80it/s]

train step 400: train loss = 0.741


 95%|█████████▍| 500/529 [04:38<00:16,  1.79it/s]

train step 500: train loss = 0.744


100%|██████████| 529/529 [04:54<00:00,  1.80it/s]


train evaluation


100%|██████████| 529/529 [04:23<00:00,  2.01it/s]


epoch 2: train ava accuracy = 0.832
epoch 2: train loss = 0.715
epoch 2: train ovo accuracy = 0.925
test evaluation


100%|██████████| 166/166 [01:22<00:00,  2.01it/s]


epoch 2: test ava accuracy = 0.815
epoch 2: test loss = 0.731
epoch 2: test ovo accuracy = 0.923


 19%|█▉        | 100/529 [00:55<03:58,  1.80it/s]

train step 100: train loss = 0.743


 38%|███▊      | 200/529 [01:51<03:02,  1.80it/s]

train step 200: train loss = 0.699


 57%|█████▋    | 300/529 [02:46<02:07,  1.80it/s]

train step 300: train loss = 0.735


 76%|███████▌  | 400/529 [03:41<01:11,  1.81it/s]

train step 400: train loss = 0.692


 95%|█████████▍| 500/529 [04:37<00:16,  1.80it/s]

train step 500: train loss = 0.691


100%|██████████| 529/529 [04:53<00:00,  1.80it/s]


train evaluation


100%|██████████| 529/529 [04:23<00:00,  2.01it/s]


epoch 3: train ava accuracy = 0.841
epoch 3: train loss = 0.706
epoch 3: train ovo accuracy = 0.930
test evaluation


100%|██████████| 166/166 [01:22<00:00,  2.01it/s]


epoch 3: test ava accuracy = 0.824
epoch 3: test loss = 0.722
epoch 3: test ovo accuracy = 0.927


 19%|█▉        | 100/529 [00:55<03:58,  1.80it/s]

train step 100: train loss = 0.720


 38%|███▊      | 200/529 [01:51<03:02,  1.80it/s]

train step 200: train loss = 0.672


 57%|█████▋    | 300/529 [02:46<02:07,  1.80it/s]

train step 300: train loss = 0.704


 76%|███████▌  | 400/529 [03:42<01:11,  1.80it/s]

train step 400: train loss = 0.712


 95%|█████████▍| 500/529 [04:38<00:16,  1.79it/s]

train step 500: train loss = 0.713


100%|██████████| 529/529 [04:54<00:00,  1.80it/s]


train evaluation


100%|██████████| 529/529 [04:23<00:00,  2.01it/s]


epoch 4: train ava accuracy = 0.845
epoch 4: train loss = 0.702
epoch 4: train ovo accuracy = 0.924
test evaluation


100%|██████████| 166/166 [01:22<00:00,  2.02it/s]


epoch 4: test ava accuracy = 0.823
epoch 4: test loss = 0.725
epoch 4: test ovo accuracy = 0.921


 19%|█▉        | 100/529 [00:55<03:58,  1.80it/s]

train step 100: train loss = 0.694


 38%|███▊      | 200/529 [01:51<03:03,  1.79it/s]

train step 200: train loss = 0.752


 57%|█████▋    | 300/529 [02:47<02:07,  1.80it/s]

train step 300: train loss = 0.687


 76%|███████▌  | 400/529 [03:42<01:11,  1.80it/s]

train step 400: train loss = 0.761


 95%|█████████▍| 500/529 [04:38<00:16,  1.81it/s]

train step 500: train loss = 0.692


100%|██████████| 529/529 [04:54<00:00,  1.80it/s]


train evaluation


100%|██████████| 529/529 [04:22<00:00,  2.01it/s]


epoch 5: train ava accuracy = 0.857
epoch 5: train loss = 0.691
epoch 5: train ovo accuracy = 0.932
test evaluation


100%|██████████| 166/166 [01:22<00:00,  2.02it/s]


epoch 5: test ava accuracy = 0.833
epoch 5: test loss = 0.714
epoch 5: test ovo accuracy = 0.926


 19%|█▉        | 100/529 [00:55<03:59,  1.79it/s]

train step 100: train loss = 0.686


 38%|███▊      | 200/529 [01:51<03:02,  1.80it/s]

train step 200: train loss = 0.696


 57%|█████▋    | 300/529 [02:46<02:06,  1.81it/s]

train step 300: train loss = 0.730


 76%|███████▌  | 400/529 [03:42<01:11,  1.81it/s]

train step 400: train loss = 0.721


 95%|█████████▍| 500/529 [04:37<00:16,  1.81it/s]

train step 500: train loss = 0.736


100%|██████████| 529/529 [04:53<00:00,  1.80it/s]


train evaluation


100%|██████████| 529/529 [04:22<00:00,  2.01it/s]


epoch 6: train ava accuracy = 0.854
epoch 6: train loss = 0.694
epoch 6: train ovo accuracy = 0.931
test evaluation


100%|██████████| 166/166 [01:22<00:00,  2.02it/s]


epoch 6: test ava accuracy = 0.821
epoch 6: test loss = 0.727
epoch 6: test ovo accuracy = 0.925


 19%|█▉        | 100/529 [00:55<03:59,  1.79it/s]

train step 100: train loss = 0.656


 38%|███▊      | 200/529 [01:51<03:03,  1.79it/s]

train step 200: train loss = 0.705


 57%|█████▋    | 300/529 [02:47<02:07,  1.80it/s]

train step 300: train loss = 0.734


 76%|███████▌  | 400/529 [03:42<01:11,  1.80it/s]

train step 400: train loss = 0.649


 95%|█████████▍| 500/529 [04:38<00:16,  1.79it/s]

train step 500: train loss = 0.724


100%|██████████| 529/529 [04:54<00:00,  1.80it/s]


train evaluation


100%|██████████| 529/529 [04:22<00:00,  2.01it/s]


epoch 7: train ava accuracy = 0.850
epoch 7: train loss = 0.697
epoch 7: train ovo accuracy = 0.932
test evaluation


100%|██████████| 166/166 [01:22<00:00,  2.01it/s]

epoch 7: test ava accuracy = 0.821
epoch 7: test loss = 0.725
epoch 7: test ovo accuracy = 0.929





In [16]:
def precision_macro(conf_matrix):
  #macro = averaged across precisions for each class
  n_classes = conf_matrix.shape[0]
  by_class = []
  for i in range(n_classes):
    val = conf_matrix[i, i]/conf_matrix[:, i].sum()
    by_class.append(val)
  return by_class, sum(by_class)/n_classes # class-wise precision and macro

def recall_macro(conf_matrix):
  #macro = averaged across recalls for each class
  n_classes = conf_matrix.shape[0]
  by_class = []
  for i in range(n_classes):
    val = conf_matrix[i, i]/conf_matrix[i, :].sum()
    by_class.append(val)
  return by_class, sum(by_class)/n_classes # class-wise recall and macro

def precision_ovo(conf_matrix, one_label=0): #one_label -- the label of the class which is opposed to other ones
  sub_matrix = conf_matrix[:, one_label]
  denum = conf_matrix.sum() - sub_matrix.sum()
  num = conf_matrix.sum() - sub_matrix.sum() - conf_matrix[one_label, :].sum() + conf_matrix[one_label, one_label]
  return num/denum
def recall_ovo(conf_matrix, one_label=0):
  return conf_matrix[one_label, one_label]/conf_matrix[one_label, :].sum()


In [17]:
def to_python_types(results):
  output = {}
  output['training_loss_history'] = results['training_loss_history']
  for x in ['train_epochs_res', 'test_epochs_res', 'real_test_res']:
    if x in [y for y in results.keys()]:
        output[x] = []
        for hist_log in results[x]:
          res = {}
          res['accuracy_ava'] = hist_log['accuracy_ava'].item()
          if 'loss' in [y for y in hist_log.keys()]:
            res['loss'] = hist_log['loss']
          res['accuracy_ovo'] = hist_log['accuracy_ovo'].item()
          if 'conf_matrix' in [y for y in hist_log.keys()]:
            res['conf_matrix'] = hist_log['conf_matrix'].tolist()
          output[x].append(res)

  return output

In [18]:
test_dataset = AnswersDataset(tokenizer, train_test_data, 'test', max_len=max_seq_length)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,
                        pin_memory=True, num_workers=2)

100%|██████████| 18019/18019 [00:54<00:00, 328.60it/s]


In [19]:
def evaluation_epoch_test(model, loader):
  n_correct, n_total = 0, 0
  n_correct_oo = 0
  total_loss = 0

  total_conf_matrix = torch.zeros(size=(3, 3))

  with torch.no_grad():
    for chunk in tqdm(loader):
      qx, ax, y, qmask, amask = chunk
      pred = model(qx.to(device), ax.to(device), qmask.to(device), amask.to(device)).cpu()
      apred = torch.argmax(pred, 1) # for all-vs-all classification

      oopred = torch.where(apred > 0, 1, 0) # for one-vs-others classification
      ooy = torch.where(y > 0, 1, 0)

      n_correct += (apred == y).sum()
      n_correct_oo += (oopred == ooy).sum()

      n_total += y.shape[0]

      #total_loss += y.shape[0] * loss_obj(pred, y).item()
      total_conf_matrix+=conf_matrix(apred, y, num_classes=3)

  return {'accuracy_ava': n_correct/n_total, 'loss': -1,
          'accuracy_ovo':n_correct_oo/n_total, 'conf_matrix': total_conf_matrix}

In [20]:
test_results = evaluation_epoch_test(model1, test_loader)

100%|██████████| 141/141 [01:10<00:00,  2.00it/s]


In [21]:
mtx = test_results['conf_matrix']
precision_ovo(mtx).item(), recall_ovo(mtx).item()

(0.9508571624755859, 0.8940513730049133)

In [22]:
mtx

tensor([[5080.,  162.,  440.],
        [ 405., 4848., 1408.],
        [ 282.,  428., 4964.]])

In [23]:
precision_macro(mtx), recall_macro(mtx)

(([tensor(0.8809), tensor(0.8915), tensor(0.7287)], tensor(0.8337)),
 ([tensor(0.8941), tensor(0.7278), tensor(0.8749)], tensor(0.8322)))

In [24]:
train_hist['real_test_res'] = [test_results]

In [None]:
train_hist['real_test_res']

In [None]:
to_python_types(train_hist)

In [25]:
import json
with open('distilbert_train_val_res_v10.json', 'w') as f:
    json.dump(to_python_types(train_hist), f)