In [2]:
!pip install torcheval

Collecting torcheval
  Downloading torcheval-0.0.7-py3-none-any.whl.metadata (8.6 kB)
Downloading torcheval-0.0.7-py3-none-any.whl (179 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.2/179.2 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: torcheval
Successfully installed torcheval-0.0.7


In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from transformers import T5Tokenizer, T5Model
#from datasets import Dataset

from torch.utils.data import Dataset, DataLoader

from transformers.pipelines.pt_utils import KeyDataset
from tqdm import tqdm
import pandas as pd

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
torch.random.manual_seed(0)
tokenizer = T5Tokenizer.from_pretrained("t5-base")
model = T5Model.from_pretrained("t5-base")

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

In [11]:
model

T5Model(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dropout(p=0.1, inplace

In [5]:
import os
import random
import numpy as np


def enable_determinism():
    os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
    torch.use_deterministic_algorithms(True)

def fix_seeds(seed: int):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.mps.manual_seed(seed)

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(0)

enable_determinism()
fix_seeds(0)

In [11]:
train_test_data = pd.read_csv('/kaggle/input/human-vs-qwen25-n-phi3/train_test_data.csv')

In [5]:
# Класс датасета
class AnswersDataset(Dataset):
  def __init__(self, tokenizer, data_df, sampletype,  max_len=512):
    self.raw_data = data_df[data_df['sample_type']==sampletype]

    self.max_len = max_len
    self.tokenizer = tokenizer
    self.inputs_q = []
    self.inputs_a = []
    self.targets = []

    self.class_mapper = {'Human': 0, 'Phi3-mini': 1, 'Qwen25': 2}

    self.class_mapper_inv = {v: k for k, v in self.class_mapper.items()}

    self._build()


  def __len__(self):
    return len(self.inputs_a)

  def __getitem__(self, index):
    question_ids = self.inputs_q[index]["input_ids"].squeeze()
    answers_ids = self.inputs_a[index]["input_ids"].squeeze()
    target_ids = self.targets[index]

    #src_mask    = self.inputs[index]["attention_mask"].squeeze()  # might need to squeeze
    #target_mask = self.targets[index]["attention_mask"].squeeze()  # might need to squeeze

    return  question_ids, answers_ids, target_ids
    #{"question_ids": question_ids, "answers_ids": answers_ids, "target_ids": target_ids}

  def _build(self):
    self._buil_examples_from_files()

  def _buil_examples_from_files(self):
    # REPLACE_NO_SPACE = re.compile("[.;:!\'?,\"()]")
    # REPLACE_WITH_SPACE = re.compile("()|(\-)|(\/)")

    for i, row in tqdm(self.raw_data.iterrows(), total=self.raw_data.shape[0]):

      if pd.isna(row['Answers']):
        continue

      text_question = row['Question']
      text_answer = row['Answers']

      line_question = text_question.strip()
      line_answer = text_answer.strip()


      # line = REPLACE_NO_SPACE.sub("", line)
      # line = REPLACE_WITH_SPACE.sub("", line)
      # line = line + ' '

      target = self.class_mapper[row['Author']]

       # tokenize inputs
      tokenized_questions = self.tokenizer.batch_encode_plus(
          [line_question], max_length=self.max_len, padding='max_length', return_tensors="pt",
          truncation=True
      )

      tokenized_answers = self.tokenizer.batch_encode_plus(
          [line_answer], max_length=self.max_len, padding='max_length', return_tensors="pt",
          truncation=True
      )

       # tokenize targets


      self.inputs_q.append(tokenized_questions)
      self.inputs_a.append(tokenized_answers)

      self.targets.append(target)

In [6]:
max_seq_length = 128 # with 256 one epoch with 2 evaluations take 2 hours together
train_dataset = AnswersDataset(tokenizer, train_test_data, 'train', max_len=max_seq_length)
#test_dataset = AnswersDataset(tokenizer, train_test_data, 'test', max_len=max_seq_length)
val_dataset = AnswersDataset(tokenizer, train_test_data, 'val', max_len=max_seq_length)

100%|██████████| 67699/67699 [01:21<00:00, 832.31it/s] 
100%|██████████| 21233/21233 [00:26<00:00, 808.38it/s]


In [14]:
g = torch.Generator()
batch_size=128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, generator=g,
                          pin_memory=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,
                        pin_memory=True, num_workers=2)

In [6]:
from tqdm import tqdm
import torch.nn as nn
class T5Clf_FCtuned(nn.Module):
    def __init__(self, max_seq_length=512):
        super(T5Clf_FCtuned, self).__init__()
        self.t5model = model
        self.decoder_out_features =  self.t5model.decoder.block[11].layer[2].DenseReluDense.wo.out_features
        self.length=max_seq_length
        self.fc = nn.Linear(in_features=self.length*self.decoder_out_features, out_features = 3)
        self.activation = nn.Softmax()
        self.freeze_layers()


    def freeze_layers(self):
      for param in self.t5model.parameters():
        param.requires_grad = False



    def forward(self, qx, ax):
        out = self.t5model(
            input_ids=qx,
            decoder_input_ids=ax).last_hidden_state
        out = out.view(out.shape[0], -1)
        pred = self.fc(out)

        return self.activation(pred)

In [12]:
def get_n_params(model):
    pp=0
    for p in list(model.parameters()):
        nn=1
        for s in list(p.size()):
            nn = nn*s
        pp += nn
    return pp
model1 = T5Clf_FCtuned(max_seq_length)
get_n_params(model1.t5model.shared)+get_n_params(model1.t5model.encoder)\
+get_n_params(model1.t5model.decoder)+get_n_params(model1.fc)

272547075

In [20]:
from torcheval.metrics.functional import multiclass_confusion_matrix as conf_matrix

def evaluation_epoch(model, loader, loss_obj):
  n_correct, n_total = 0, 0
  n_correct_oo = 0
  total_loss = 0

  total_conf_matrix = torch.zeros(size=(3, 3))

  with torch.no_grad():
    for chunk in tqdm(loader):
      qx, ax, y = chunk
      pred = model(qx.to(device), ax.to(device)).cpu()
      apred = torch.argmax(pred, 1) # for all-vs-all classification

      oopred = torch.where(apred > 0, 1, 0) # for one-vs-others classification
      ooy = torch.where(y > 0, 1, 0)

      n_correct += (apred == y).sum()
      n_correct_oo += (oopred == ooy).sum()

      n_total += y.shape[0]

      total_loss += y.shape[0] * loss_obj(pred, y).item()
      total_conf_matrix+=conf_matrix(apred, y, num_classes=3)

  return {'accuracy_ava': n_correct/n_total, 'loss': total_loss/n_total,
          'accuracy_ovo':n_correct_oo/n_total, 'conf_matrix': total_conf_matrix}

def train_neural_net(model, train_loader, test_loader, n_epochs=1):
  loss = nn.CrossEntropyLoss()
  optimizer=torch.optim.Adam(model.fc.parameters(), lr=1e-4)
  loss_train_history = [] # логируется всегда

  # логируются каждую эпоху

  train_epoch_evals = []
  test_epoch_evals = []

  for _ in range(n_epochs):
    i=0
    model.train().to(device)
    for train_chunk in tqdm(train_loader):
        qx, ax, y = train_chunk
        optimizer.zero_grad(set_to_none=True)


        pred = model(qx.to(device), ax.to(device))
        loss_val = loss(pred, y.to(device)) #.long()
        loss_val.backward()

        loss_val_item = loss_val.detach().cpu().item()

        optimizer.step()
        loss_train_history.append(loss_val_item)
        i+=1
        if i % 100 == 0:
          print(f'train step {i}: train loss = {loss_val_item :.3f}')
        

    model.eval()
    #model.cpu()

    #train
    print('train evaluation')
    train_eval = evaluation_epoch(model, train_loader, loss)
    print(f"epoch {_}: train ava accuracy = {train_eval['accuracy_ava'] :.3f}")
    print(f"epoch {_}: train loss = {train_eval['loss'] :.3f}")
    print(f"epoch {_}: train ovo accuracy = {train_eval['accuracy_ovo'] :.3f}")
    train_epoch_evals.append(train_eval)

    #test
    print('test evaluation')
    test_eval = evaluation_epoch(model, test_loader, loss)
    print(f"epoch {_}: test ava accuracy = {test_eval['accuracy_ava'] :.3f}")
    print(f"epoch {_}: test loss = {test_eval['loss'] :.3f}")
    print(f"epoch {_}: test ovo accuracy = {test_eval['accuracy_ovo'] :.3f}")
    test_epoch_evals.append(test_eval)

  return {'training_loss_history': loss_train_history,
          'train_epochs_res': train_epoch_evals,
          'test_epochs_res': test_epoch_evals
          }

In [21]:
enable_determinism()
fix_seeds(0)
model1 = T5Clf_FCtuned(max_seq_length)
train_hist = train_neural_net(model1, train_loader, val_loader, n_epochs=8)

 19%|█▉        | 100/529 [04:10<17:57,  2.51s/it]

train step 100: train loss = 0.951


 38%|███▊      | 200/529 [08:20<13:40,  2.49s/it]

train step 200: train loss = 0.909


 57%|█████▋    | 300/529 [12:30<09:33,  2.50s/it]

train step 300: train loss = 0.904


 76%|███████▌  | 400/529 [16:41<05:24,  2.51s/it]

train step 400: train loss = 0.913


 95%|█████████▍| 500/529 [20:52<01:12,  2.50s/it]

train step 500: train loss = 0.829


100%|██████████| 529/529 [22:04<00:00,  2.50s/it]


train evaluation


100%|██████████| 529/529 [20:32<00:00,  2.33s/it]


epoch 0: train ava accuracy = 0.678
epoch 0: train loss = 0.868
epoch 0: train ovo accuracy = 0.824
test evaluation


100%|██████████| 166/166 [06:25<00:00,  2.32s/it]


epoch 0: test ava accuracy = 0.645
epoch 0: test loss = 0.894
epoch 0: test ovo accuracy = 0.824


 19%|█▉        | 100/529 [04:10<17:52,  2.50s/it]

train step 100: train loss = 0.818


 38%|███▊      | 200/529 [08:20<13:43,  2.50s/it]

train step 200: train loss = 0.902


 57%|█████▋    | 300/529 [12:30<09:32,  2.50s/it]

train step 300: train loss = 0.859


 76%|███████▌  | 400/529 [16:40<05:23,  2.51s/it]

train step 400: train loss = 0.866


 95%|█████████▍| 500/529 [20:50<01:12,  2.50s/it]

train step 500: train loss = 0.840


100%|██████████| 529/529 [22:03<00:00,  2.50s/it]


train evaluation


100%|██████████| 529/529 [20:31<00:00,  2.33s/it]


epoch 1: train ava accuracy = 0.728
epoch 1: train loss = 0.828
epoch 1: train ovo accuracy = 0.843
test evaluation


100%|██████████| 166/166 [06:26<00:00,  2.33s/it]


epoch 1: test ava accuracy = 0.708
epoch 1: test loss = 0.845
epoch 1: test ovo accuracy = 0.841


 19%|█▉        | 100/529 [04:09<17:56,  2.51s/it]

train step 100: train loss = 0.893


 38%|███▊      | 200/529 [08:18<13:39,  2.49s/it]

train step 200: train loss = 0.846


 57%|█████▋    | 300/529 [12:29<09:32,  2.50s/it]

train step 300: train loss = 0.854


 76%|███████▌  | 400/529 [16:39<05:21,  2.49s/it]

train step 400: train loss = 0.782


 95%|█████████▍| 500/529 [20:49<01:12,  2.50s/it]

train step 500: train loss = 0.883


100%|██████████| 529/529 [22:01<00:00,  2.50s/it]


train evaluation


100%|██████████| 529/529 [20:28<00:00,  2.32s/it]


epoch 2: train ava accuracy = 0.740
epoch 2: train loss = 0.810
epoch 2: train ovo accuracy = 0.847
test evaluation


100%|██████████| 166/166 [06:25<00:00,  2.32s/it]


epoch 2: test ava accuracy = 0.725
epoch 2: test loss = 0.824
epoch 2: test ovo accuracy = 0.843


 19%|█▉        | 100/529 [04:08<17:45,  2.48s/it]

train step 100: train loss = 0.844


 38%|███▊      | 200/529 [08:16<13:35,  2.48s/it]

train step 200: train loss = 0.800


 57%|█████▋    | 300/529 [12:25<09:32,  2.50s/it]

train step 300: train loss = 0.847


 76%|███████▌  | 400/529 [16:36<05:22,  2.50s/it]

train step 400: train loss = 0.857


 95%|█████████▍| 500/529 [20:45<01:12,  2.48s/it]

train step 500: train loss = 0.858


100%|██████████| 529/529 [21:56<00:00,  2.49s/it]


train evaluation


100%|██████████| 529/529 [20:27<00:00,  2.32s/it]


epoch 3: train ava accuracy = 0.750
epoch 3: train loss = 0.801
epoch 3: train ovo accuracy = 0.854
test evaluation


100%|██████████| 166/166 [06:26<00:00,  2.33s/it]


epoch 3: test ava accuracy = 0.719
epoch 3: test loss = 0.828
epoch 3: test ovo accuracy = 0.847


 19%|█▉        | 100/529 [04:08<17:55,  2.51s/it]

train step 100: train loss = 0.860


 38%|███▊      | 200/529 [08:20<13:48,  2.52s/it]

train step 200: train loss = 0.853


 57%|█████▋    | 300/529 [12:30<09:29,  2.49s/it]

train step 300: train loss = 0.790


 76%|███████▌  | 400/529 [16:38<05:20,  2.48s/it]

train step 400: train loss = 0.804


 95%|█████████▍| 500/529 [20:46<01:12,  2.49s/it]

train step 500: train loss = 0.819


100%|██████████| 529/529 [21:58<00:00,  2.49s/it]


train evaluation


100%|██████████| 529/529 [20:22<00:00,  2.31s/it]


epoch 4: train ava accuracy = 0.759
epoch 4: train loss = 0.794
epoch 4: train ovo accuracy = 0.862
test evaluation


100%|██████████| 166/166 [06:26<00:00,  2.33s/it]


epoch 4: test ava accuracy = 0.732
epoch 4: test loss = 0.817
epoch 4: test ovo accuracy = 0.858


 19%|█▉        | 100/529 [04:08<17:45,  2.48s/it]

train step 100: train loss = 0.811


 38%|███▊      | 200/529 [08:18<13:42,  2.50s/it]

train step 200: train loss = 0.808


 57%|█████▋    | 300/529 [12:28<09:32,  2.50s/it]

train step 300: train loss = 0.834


 76%|███████▌  | 400/529 [16:38<05:22,  2.50s/it]

train step 400: train loss = 0.782


 95%|█████████▍| 500/529 [20:47<01:12,  2.50s/it]

train step 500: train loss = 0.844


100%|██████████| 529/529 [22:00<00:00,  2.50s/it]


train evaluation


100%|██████████| 529/529 [20:33<00:00,  2.33s/it]


epoch 5: train ava accuracy = 0.730
epoch 5: train loss = 0.816
epoch 5: train ovo accuracy = 0.861
test evaluation


100%|██████████| 166/166 [06:26<00:00,  2.33s/it]


epoch 5: test ava accuracy = 0.689
epoch 5: test loss = 0.853
epoch 5: test ovo accuracy = 0.861


 19%|█▉        | 100/529 [04:09<17:52,  2.50s/it]

train step 100: train loss = 0.835


 38%|███▊      | 200/529 [08:19<13:41,  2.50s/it]

train step 200: train loss = 0.784


 57%|█████▋    | 300/529 [12:29<09:34,  2.51s/it]

train step 300: train loss = 0.813


 76%|███████▌  | 400/529 [16:39<05:20,  2.49s/it]

train step 400: train loss = 0.795


 95%|█████████▍| 500/529 [20:48<01:12,  2.49s/it]

train step 500: train loss = 0.821


100%|██████████| 529/529 [22:00<00:00,  2.50s/it]


train evaluation


100%|██████████| 529/529 [20:26<00:00,  2.32s/it]


epoch 6: train ava accuracy = 0.747
epoch 6: train loss = 0.802
epoch 6: train ovo accuracy = 0.859
test evaluation


100%|██████████| 166/166 [06:22<00:00,  2.31s/it]


epoch 6: test ava accuracy = 0.709
epoch 6: test loss = 0.835
epoch 6: test ovo accuracy = 0.858


 19%|█▉        | 100/529 [04:09<17:53,  2.50s/it]

train step 100: train loss = 0.870


 38%|███▊      | 200/529 [08:20<13:43,  2.50s/it]

train step 200: train loss = 0.791


 57%|█████▋    | 300/529 [12:30<09:35,  2.51s/it]

train step 300: train loss = 0.827


 76%|███████▌  | 400/529 [16:41<05:22,  2.50s/it]

train step 400: train loss = 0.778


 95%|█████████▍| 500/529 [20:51<01:12,  2.50s/it]

train step 500: train loss = 0.803


100%|██████████| 529/529 [22:04<00:00,  2.50s/it]


train evaluation


100%|██████████| 529/529 [20:34<00:00,  2.33s/it]


epoch 7: train ava accuracy = 0.749
epoch 7: train loss = 0.799
epoch 7: train ovo accuracy = 0.872
test evaluation


100%|██████████| 166/166 [06:26<00:00,  2.33s/it]

epoch 7: test ava accuracy = 0.702
epoch 7: test loss = 0.840
epoch 7: test ovo accuracy = 0.865





In [None]:
train_hist

In [23]:
test_dataset = AnswersDataset(tokenizer, train_test_data, 'test', max_len=max_seq_length)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,
                        pin_memory=True, num_workers=2)

100%|██████████| 18019/18019 [00:21<00:00, 848.48it/s]


In [24]:
!pip install torcheval

  pid, fd = os.forkpty()




In [25]:
from torcheval.metrics.functional import multiclass_confusion_matrix as conf_matrix


In [26]:
def evaluation_epoch_test(model, loader):
  n_correct, n_total = 0, 0
  n_correct_oo = 0
  total_loss = 0

  total_conf_matrix = torch.zeros(size=(3, 3))

  with torch.no_grad():
    for chunk in tqdm(loader):
      qx, ax, y =chunk
      pred = model(qx.to(device), ax.to(device)).cpu()
      apred = torch.argmax(pred, 1) # for all-vs-all classification

      oopred = torch.where(apred > 0, 1, 0) # for one-vs-others classification
      ooy = torch.where(y > 0, 1, 0)

      n_correct += (apred == y).sum()
      n_correct_oo += (oopred == ooy).sum()

      n_total += y.shape[0]

      #total_loss += y.shape[0] * loss_obj(pred, y).item()
      total_conf_matrix+=conf_matrix(apred, y, num_classes=3)

  return {'accuracy_ava': n_correct/n_total, 'loss': -1,
          'accuracy_ovo':n_correct_oo/n_total, 'conf_matrix': total_conf_matrix}

In [29]:
test_results = evaluation_epoch_test(model1, test_loader)

100%|██████████| 141/141 [05:28<00:00,  2.33s/it]


In [32]:
mtx = test_results['conf_matrix']
precision_ovo(mtx).item(), recall_ovo(mtx).item()

(0.9325045943260193, 0.8643083572387695)

In [33]:
precision_macro(mtx), recall_macro(mtx)

(([tensor(0.7448), tensor(0.8638), tensor(0.6039)], tensor(0.7375)),
 ([tensor(0.8643), tensor(0.5017), tensor(0.8040)], tensor(0.7234)))

In [34]:
mtx

tensor([[4911.,  186.,  585.],
        [ 912., 3342., 2407.],
        [ 771.,  341., 4562.]])

In [30]:
def precision_macro(conf_matrix):
  #macro = averaged across precisions for each class
  n_classes = conf_matrix.shape[0]
  by_class = []
  for i in range(n_classes):
    val = conf_matrix[i, i]/conf_matrix[:, i].sum()
    by_class.append(val)
  return by_class, sum(by_class)/n_classes # class-wise precision and macro

def recall_macro(conf_matrix):
  #macro = averaged across recalls for each class
  n_classes = conf_matrix.shape[0]
  by_class = []
  for i in range(n_classes):
    val = conf_matrix[i, i]/conf_matrix[i, :].sum()
    by_class.append(val)
  return by_class, sum(by_class)/n_classes # class-wise recall and macro

def precision_ovo(conf_matrix, one_label=0): #one_label -- the label of the class which is opposed to other ones
  sub_matrix = conf_matrix[:, one_label]
  denum = conf_matrix.sum() - sub_matrix.sum()
  num = conf_matrix.sum() - sub_matrix.sum() - conf_matrix[one_label, :].sum() + conf_matrix[one_label, one_label]
  return num/denum
def recall_ovo(conf_matrix, one_label=0):
  return conf_matrix[one_label, one_label]/conf_matrix[one_label, :].sum()


In [31]:
def to_python_types(results):
  output = {}
  output['training_loss_history'] = results['training_loss_history']
  for x in ['train_epochs_res', 'test_epochs_res', 'real_test_res']:
    if x in [y for y in results.keys()]:
        output[x] = []
        for hist_log in results[x]:
          res = {}
          res['accuracy_ava'] = hist_log['accuracy_ava'].item()
          if 'loss' in [y for y in hist_log.keys()]:
            res['loss'] = hist_log['loss']
          res['accuracy_ovo'] = hist_log['accuracy_ovo'].item()
          if 'conf_matrix' in [y for y in hist_log.keys()]:
            res['conf_matrix'] = hist_log['conf_matrix'].tolist()
          output[x].append(res)

  return output

In [28]:
[y for y in train_hist.keys()]

['training_loss_history', 'train_epochs_res', 'test_epochs_res']

In [35]:
train_hist['real_test_res'] = [test_results]

In [36]:
#print(to_python_types(train_hist))
import json
with open('t5_train_val_res.json', 'w') as f:
    json.dump(to_python_types(train_hist), f)

In [37]:
test_results = evaluation_epoch_test(model1, test_loader)

100%|██████████| 141/141 [04:58<00:00,  2.12s/it]


In [38]:
test_results

{'accuracy_ava': tensor(0.7228),
 'loss': -1,
 'accuracy_ovo': tensor(0.8760),
 'conf_matrix': tensor([[3821.,  513., 1348.],
         [ 212., 4265., 2184.],
         [ 161.,  577., 4936.]])}

In [39]:
train_hist['real_test_res'] = [test_results]

In [44]:
mtx = test_results['conf_matrix']
precision_ovo(mtx).item(), recall_ovo(mtx).item()

(0.8653693199157715, 0.6724745035171509)

In [45]:
precision_macro(mtx), recall_macro(mtx)

(([tensor(0.9111), tensor(0.7965), tensor(0.5829)], tensor(0.7635)),
 ([tensor(0.6725), tensor(0.6403), tensor(0.8699)], tensor(0.7276)))

In [40]:
with open('t5_train_val_test_res.json', 'w') as f:
    json.dump(to_python_types(train_hist), f)

In [41]:
#only train and val results
check_read = None
with open('/kaggle/working/t5_train_val_test_res.json') as f:
    check_read = json.load(f)

In [42]:
check_read['real_test_res']

[{'accuracy_ava': 0.7227618098258972,
  'loss': -1,
  'accuracy_ovo': 0.876006007194519,
  'conf_matrix': [[3821.0, 513.0, 1348.0],
   [212.0, 4265.0, 2184.0],
   [161.0, 577.0, 4936.0]]}]