In [1]:
import transformers, tokenizers, torch, datasets
from datasets import load_dataset
import os
import glob
import pickle
import pandas as pd

In [2]:
print('transformers={}'.format(transformers.__version__))
print('tokenizers={}'.format(tokenizers.__version__))
print('torch={}'.format(torch.__version__))
print('datasets={}'.format(datasets.__version__))

transformers=4.31.0
tokenizers=0.11.0
torch=1.13.1
datasets=2.10.1


In [3]:
current_directory = os.getcwd()

In [4]:
def defect_percent(df):
    return df['label'].sum()/df.shape[0]

In [None]:
datatest_versions = ['base','full_change','special_tokens']
models_names = ['bilstm','codebert','codereviewer','javabert','codet5p']
fine_tuning_techniques = ['full','partial','lora']

In [None]:
include_message=True
include_metrics=False
use_lora = False
ft_technique = fine_tuning_techniques[1]
lsg_attention = False
copy_embedding_from_model=True
print_for_debug = False
max_commit_code_length = 512
partial_trained_encoders = 3
model_name = models_names[1]
dataset_version_name = datatest_versions[0]

In [None]:
create_lsg_model = False

### load dataset

In [5]:
from datasets import load_dataset
multiple_files = True
data_name = 'apache_jit'
prefix = '<java> '
columns_to_remove = ['id','msg','code','metrics']

'base'

In [6]:
train_dataset = load_dataset("csv",data_files=f'datasets/{data_name}/train_v{version}_shuffled.csv', streaming=True,split="train")
valid_dataset = load_dataset("csv",data_files=f'datasets/{data_name}/valid_v{version}_balanced.csv',split="train")
test_dataset = load_dataset("csv",data_files=f'datasets/{data_name}/test_v{version}.csv',split="train")
train_df = pd.read_csv(f'datasets/{data_name}/train_v{version}_shuffled.csv')
valid_df = pd.read_csv(f'datasets/{data_name}/valid_v{version}_balanced.csv')
test_df = pd.read_csv(f'datasets/{data_name}/test_v{version}.csv')
train_length = train_df.shape[0]
valid_length = valid_df.shape[0]
test_length  = test_df.shape[0]

Found cached dataset csv (/home/host2-virtualmachine1/.cache/huggingface/datasets/csv/default-2b5baf67d918b518/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)
Found cached dataset csv (/home/host2-virtualmachine1/.cache/huggingface/datasets/csv/default-5c559d330a4271fa/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)


In [7]:
valid_df.head()

Unnamed: 0,id,label,msg,code
0,c548a3d335dc4fcc2c1012a5d3f0f956feff7276,0,[FLINK-11549][tests] Remove obsolete ResourceM...,[['<del> package org.apache.flink.runtime.clus...
1,a02e8e0f9fde8372ff0eea0e674f72bb8be15875,1,ZEPPELIN-3876. Unable to rename note\n\n### Wh...,"[['<del> if (isRelative) {', '<add> if (isRela..."
2,98184bd078d7b957f4cf99d26a5aacef1583fe3a,0,[FLINK-12401][table] Support incremental emit ...,[['<add> import org.apache.flink.util.Collecto...
3,045b8da502328e72976d9e3aeb79a50090596bda,1,HIVE-16642 : New Events created as part of rep...,[['<add> package org.apache.hive.hcatalog.api....
4,2f1b3eab675ac327a6f61b724d5f0bce01ec6e68,1,HBASE-19998 Flakey TestVisibilityLabelsWithDef...,"[['<add> LOG.info(""REMOVE"", new Throwable(""REM..."


In [8]:
if model_name == 'bilstm'
    model_checkpoint = 'microsoft/codereviewer'
elif model_name == 'codebert':
    model_checkpoint = 'microsoft/codebert-base'
elif model_name == 'javabert':
    model_checkpoint = 'CAUKiel/JavaBERT'
elif model_name == 'codereviewer':
    model_checkpoint = 'microsoft/codereviewer'
else:
    model_checkpoint = 'Salesforce/codet5p-220m'
if lsg_attention:
    if os.path.exists("{}_lsg_{}".format(model_name,max_commit_code_length)):
        model_checkpoint = "{}_lsg_{}".format(model_name,max_commit_code_length)
    else:
        create_lsg_model = True
model_name_suffix = model_name + '_{}{}{}{}_{}'.format(max_commit_code_length,'_msg' if include_message else '','_mtc' if include_metrics else '', '_lsg' if lsg_attention else '', '_lora' if use_lora else '',dataset_version_name) 

### Tokenize the train and test data

In [9]:
if model_name == 'javabert':
    cls_token = '[CLS]'
    sep_token = '[SEP]'
    msg_token = '<msg>'
    metrics_token = '[CLS]'
    code_change_token = '[CLS]'
else:
    cls_token = '<s>'
    sep_token = '</s>'
    msg_token = '<msg>'
    metrics_token = '<s>''
    code_change_token = '<s>'
if dataset_version_name == 'special_tokens':
    added_token = '<added>'
    removed_token = '<removed>'
else:
    added_token = '<add>'
    removed_token = '<del>'
prefix = ''

In [None]:
def add_tokens_to_tokenizer(tokenizer):
    if dataset_version_name == 'special_tokens':
        tokenizer.add_special_tokens({'additional_special_tokens':[added_token, removed_token,'<STR>','<NUM>']})
    if include_message:
        tokenizer.add_special_tokens({'additional_special_tokens':[cls_token, sep_token,'<pad>', '<unk>',added_token, removed_token,msg_token]})
    else:
        tokenizer.add_special_tokens({'additional_special_tokens':[cls_token, sep_token,'<pad>', '<unk>',added_token, removed_token]})

In [None]:
if create_lsg_model:
    from lsg_converter import LSGConverter
    from transformers import AutoModelForSequenceClassification
    converter = LSGConverter(max_sequence_length=max_commit_code_length)
    if model_name == 'javabert':
        architecture = 'RobertaForSequenceClassification'
    elif model_name == 'codebert':
        architecture = 'RobertaForSequenceClassification'
    else:
        print('Error! LSG Attention not supported for T5 models at the moment. (CodeReviewer and CodeT5+)')
        exit()
    model, tokenizer = converter.convert_from_pretrained(model_checkpoint,dropout=0.2,hidden_dropout_prob=0.2,num_labels=2,architecture=architecture)
    add_tokens_to_tokenizer(tokenizer)
    model.resize_token_embeddings(len(tokenizer))
    save_path = '{}_lsg_{}'.format(model_name,max_commit_code_length)
    model_checkpoint = save_path
    model.save_pretrained(save_path)
    tokenizer.save_pretrained(save_path)

In [10]:
from transformers import AutoTokenizer
from bi_lstm import BiLSTM
if model_name == 'bilstm':
    if copy_embedding_from_model:
        tokenizer = AutoTokenizer.from_pretrained( model_checkpoint)
    else:
        from transformers import RobertaTokenizerFast
        tokenizer_name = '{}_{}_bpe'.format(data_name,vocab_size)
        tokenizer = RobertaTokenizerFast.from_pretrained('./BPE_tokenizer/{}'.format(tokenizer_name),max_len=max_commit_code_length)
else:
    tokenizer = AutoTokenizer.from_pretrained( model_checkpoint)
add_tokens_to_tokenizer(tokenizer)

In [13]:
if print_for_debug:
    print(len(tokenizer))

{'input_ids': [0, 2, 2], 'attention_mask': [1, 1, 1]}

In [16]:
def remove_empty(seq):
    return list(filter(lambda s: s != None and s != '',seq))


def join_commit_codes_sep(commit,commit_start=' <NFILE> ',file_sep=' <NFILE> ',line_sep=' <NLINE> '):
    if type(commit) == str:
        commit = eval(commit)
    #return commit_start + file_sep.join(remove_empty([line_sep.join([correct_token(line.split(' ')[0]) +' ' + ' '.join(line.split(' ')[1:]) for line in file]) for file in commit]))
    return commit_start + file_sep.join(remove_empty([line_sep.join(file) for file in commit]))

def join_commit_codes(commit):
    return join_commit_codes_sep(commit,prefix ,f' {sep_token} ','\n')
    #return join_commit_codes_sep(commit,cls_token+' ',f' {sep_token} ',' ')
def join_file_lines(file):
    if type(file) != list:
        file = eval(file)
    return prefix +  '\n'.join(file)

def empty_join_commit_codes(commit):
    return join_commit_codes_sep(commit,'','')
def join_lines(lines,commit_start=' <NFILE> ',line_sep=' <NLINE> '):
    return commit_start + line_sep.join(lines)
def flatten(l):
    return [item for sublist in l for item in sublist]
def join_commit_msg_and_code(msg,code):
    if msg is None:
        msg = ''
    if code is None:
        code = ''
    return msg_token + ' ' + msg.split('\n')[0] + ' ' + code_change_token + ' ' + join_commit_codes_sep(code ,'',f' {sep_token} ','\n')
def join_commit_msg_metrics_code(msg,mtc,code):
    if msg is None:
        msg = ''
    if code is None:
        code = ''
    if mtc is None:
        mtc = ''
    return msg_token + ' ' + msg.split('\n')[0] + '\n' + metrics_token + ' ' + mtc + '\n' + code_change_token + ' ' + join_commit_codes_sep(code ,'',f' {sep_token} ','\n')

In [17]:
def encode(batch):
    if multiple_files:
        if include_message:
            if include_metrics:
                inputs = tokenizer(list(map(join_commit_msg_metrics_code,batch['msg'],batch['metrics'],batch['code'])),truncation="longest_first",max_length=max_commit_code_length)
            else:
                inputs = tokenizer(list(map(join_commit_msg_and_code,batch['msg'],batch['code'])),truncation="longest_first",max_length=max_commit_code_length)
        elif include_metrics:
            inputs = tokenizer(list(map(join_commit_msg_and_code,batch['metrics'],batch['code'])),truncation="longest_first",max_length=max_commit_code_length)
        else:
            inputs = tokenizer(list(map(join_commit_codes,batch['code'])),truncation="longest_first",max_length=max_commit_code_length)
    else:    
        inputs = tokenizer(list(map(join_file_lines,batch['code'])),truncation="longest_first",max_length=max_commit_code_length)
    batch["input_ids"] = inputs.input_ids
    batch["attention_mask"] = inputs.attention_mask
    return batch

In [18]:
from transformers import DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer,max_length=max_commit_code_length,padding='longest')

2023-09-25 15:18:46.435917: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Model classes and initialization


In [19]:
device = 'cuda'

In [20]:
import torch
import torch.nn as nn
from transformers import AutoModel,AutoModelForSequenceClassification, AutoConfig
if model_name == 'bilstm':
    model = BiLSTM(len(tokenizer),embed_size=768,hidden_size=64,lstm_layers=4,dropout=0.2,padding_id=tokenizer.pad_token_id)
    if copy_embedding_from_model:
        copy_from_model = AutoModel.from_pretrained(model_checkpoint)
        with torch.no_grad():
            model.embedding.weight.copy_(copy_from_model.encoder.embed_tokens.weight)
            model.embedding.require_grad = False
else:
    config = AutoConfig.from_pretrained(model_checkpoint)
    config.hidden_dropout_prob = 0.2
    config.dropout = 0.2
    config.num_labels=2
    model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint,config=config)

    #change embedding layer size to match tokenizer vocabulary size (Because we added new tokens to the tokenizer):
    model.resize_token_embeddings(len(tokenizer))

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Embedding(50268, 768)

In [21]:
if print_for_debug:
    print(model)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50268, 768)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.2, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm

In [22]:
if ft_technique == 'lora':
    from peft import get_peft_config, PeftModel, PeftConfig, get_peft_model, LoraConfig, TaskType

    peft_config = LoraConfig(
        task_type=TaskType.SEQ_CLS, inference_mode=False, r=16, lora_alpha=16, lora_dropout=0.1, bias="all"
    )
    model = get_peft_model(model, peft_config)
    model.print_trainable_parameters()
elif ft_technique == 'partial':
    # freeze encoder layers except the last 2 layers
    modules = []
    trained_encoder_layers = partial_trained_encoders
    if model_name == 'codebert':
        modules = [model.roberta.embeddings, *model.roberta.encoder.layer[:-trained_encoder_layers]]
    elif model_name == 'javabert' 
        modules = [model.bert.embeddings, *model.bert.encoder.layer[:-trained_encoder_layers]]
    elif model_name == 'codereviewer' or model_name == 'codet5p':
        modules = [model.shared, *model.encoder.block[:-trained_encoder_layers]]
    for module in modules:
        for param in module.parameters():
            param.requires_grad = False

In [24]:
def param_count(model,trainable_only=True):
    return sum([p.numel()for p in model.parameters() if p.requires_grad or not trainable_only])
if print_for_debug:
    print(param_count(model))
    print(param_count(model,False))

14767874
124649474


In [25]:
import numpy as np
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score, recall_score, matthews_corrcoef
def roc_auc(preds,target):
    roc_auc_score(target, preds)

def compute_metrics(p):
    _predictions = p.predictions
    _labels = p.label_ids
    _predictions = np.argmax(_predictions, axis=-1)
    vals = {}
    vals['accuracy'] = accuracy_score(_labels, _predictions)
    vals['f1'] = f1_score(_labels, _predictions)
    vals['precision'] = precision_score(_labels, _predictions)
    vals['recall'] = recall_score(_labels, _predictions)
    vals['matthews_correlation'] = matthews_corrcoef(_labels, _predictions)
    vals['auc'] = roc_auc_score(_labels, _predictions.reshape(-1,1))
    return  vals

In [None]:
from transformers.optimization import AdamW
from transformers import Trainer,get_linear_schedule_with_warmup
from math import ceil
init_lr,head_lr = 5e-4 ,1e-4
adam_eps = 1e-6
weight_decay = 0.01
epochs=10
batch_size = 8
gradient_accumulation_steps = 8
batch_steps = int(train_length/(batch_size*gradient_accumulation_steps))
#rem_steps = train_length%(batch_size*gradient_accumulation_steps)
rem_steps = ceil((train_length%(batch_size*gradient_accumulation_steps)) / batch_size)
train_steps = (epochs) * (batch_steps + rem_steps)
warmpup_factor = 0.25
warmpup_steps = int(train_steps*warmpup_factor)
optim = 'adafactor'

In [27]:
train_dataset = train_dataset.map(encode, batch_size=batch_size,batched=True, remove_columns=columns_to_remove)
valid_dataset = valid_dataset.map(encode, batch_size=batch_size,batched=True, remove_columns=columns_to_remove)

Loading cached processed dataset at /home/host2-virtualmachine1/.cache/huggingface/datasets/csv/default-2b5baf67d918b518/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-f32eac4c6c9bd6ad.arrow


In [28]:
if print_for_debug:
    for v in range(3):
        print(valid_dataset['input_ids'][v])
        print(valid_dataset['label'][v])
        #print(valid_dataset['attention_mask'][v])
        print(tokenizer.decode(valid_dataset['input_ids'][v]))

[0, 50267, 646, 7613, 23617, 12, 15314, 3414, 46386, 47173, 742, 27336, 29707, 13877, 44854, 2068, 38834, 1437, 0, 1437, 50266, 3737, 31118, 4, 48530, 4, 4825, 4291, 4, 49600, 4, 3998, 10504, 48019, 131, 50118, 50266, 6595, 18735, 2348, 4, 24625, 4, 40448, 36383, 131, 50118, 50266, 6595, 18735, 2348, 4, 21959, 23199, 4, 32379, 34603, 29233, 131, 50118, 50266, 6595, 31118, 4, 48530, 4, 4825, 4291, 4, 43163, 27975, 4, 49602, 131, 50118, 50266, 6595, 31118, 4, 48530, 4, 4825, 4291, 4, 49600, 4, 677, 2348, 4, 33282, 2348, 41967, 5290, 131, 50118, 50266, 6595, 31118, 4, 48530, 4, 4825, 4291, 4, 49600, 4, 3998, 10504, 48019, 4, 41817, 4, 47279, 2688, 131, 50118, 50266, 6595, 31118, 4, 48530, 4, 4825, 4291, 4, 49600, 4, 3530, 39798, 4, 18522, 49054, 39868, 131, 50118, 50266, 6595, 31118, 4, 48530, 4, 4825, 4291, 4, 49600, 4, 3530, 39798, 4, 13424, 1999, 4, 35804, 17452, 4, 42578, 42495, 24017, 39868, 131, 50118, 50266, 6595, 31118, 4, 48530, 4, 4825, 4291, 4, 49600, 4, 48768, 4, 40448, 37155,

[0, 50267, 646, 7613, 23617, 12, 22960, 2663, 46386, 14595, 742, 7737, 16548, 29901, 223, 5438, 27814, 15664, 5745, 13, 786, 12, 42996, 5230, 19920, 46498, 41101, 15, 9513, 21013, 4, 1437, 0, 1437, 50265, 6595, 31118, 4, 48530, 4, 4825, 4291, 4, 32843, 4, 44252, 368, 131, 50118, 50266, 1009, 1437, 1437, 1437, 1437, 28696, 3572, 15698, 991, 405, 33977, 49138, 3572, 15698, 50118, 50265, 1009, 1437, 1437, 1437, 1437, 28696, 3572, 15698, 991, 405, 33977, 50, 29901, 39962, 3908, 27814, 15664, 49138, 3572, 15698, 50118, 50265, 1009, 28696, 5234, 15698, 50118, 50265, 1009, 25522, 1039, 20414, 50118, 50265, 1009, 28277, 358, 86, 77, 41, 40796, 898, 197, 28, 1468, 1538, 4, 20, 1835, 923, 115, 50118, 50265, 1009, 28, 1169, 41, 419, 8, 20044, 898, 36, 28030, 3435, 37141, 25, 414, 5240, 43, 50, 5, 507, 50118, 50265, 1009, 898, 9, 5, 40796, 4, 50118, 50265, 1009, 50118, 50265, 1009, 22248, 31, 29901, 33977, 6, 29901, 39962, 3908, 27814, 15664, 16, 341, 7, 29901, 3266, 14, 33, 57, 4752, 4, 50118, 50

In [29]:
from transformers import TrainingArguments
training_args = TrainingArguments(
    output_dir = './models/{}/{}'.format(data_name,model_name_suffix),
    num_train_epochs = epochs,
    per_device_train_batch_size = batch_size,
    gradient_accumulation_steps = gradient_accumulation_steps,
    per_device_eval_batch_size= batch_size,
    save_total_limit = 2,
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model ='auc',
    evaluation_strategy = "epoch",
    eval_steps=4,
    disable_tqdm = False,
    warmup_steps=warmpup_steps,
    logging_steps = 4,
    remove_unused_columns=False,
    report_to="wandb",
    fp16 = False,
    logging_dir= './models/{}/{}/logs/'.format(data_name,model_name_suffix),
    dataloader_num_workers = 0,
    max_steps=train_steps)

In [30]:
from transformers.optimization import Adafactor

if optim == 'adamw':
    opt = torch.optim.AdamW(model.parameters(),lr=init_lr,betas=(0.9, 0.999), eps=adam_eps, weight_decay=weight_decay)
elif optim == 'adafactor':
    opt = Adafactor(model.parameters(), lr=init_lr, relative_step=False, warmup_init=False)
scheduling_types = ['warmpup_anneal', 'warmup','constant']
scheduling_type = scheduling_types[1]

if scheduling_type == scheduling_types[0]:
    lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer=opt,
        max_lr=init_lr,
        pct_start=training_args.warmup_steps / training_args.max_steps,
        anneal_strategy="linear",
        total_steps=training_args.max_steps
    )
elif scheduling_type == scheduling_types[1]:
    lr_scheduler = transformers.get_constant_schedule_with_warmup(opt,training_args.warmup_steps)
else:
    lr_scheduler = torch.optim.lr_scheduler.LambdaLR(opt,lambda epoch: init_lr)

In [None]:
import wandb
# Login with your authentication key

wandb.login()
training_hyper_params = {
    "model_name": model_name_suffix,
    "optimizer": optim,
    "base_lr": init_lr,
    "weight_decay": weight_decay,
    "warmpup_factor":warmpup_factor,
    "warmpup_steps": warmpup_steps,
    "batch_size": batch_size,
    "gradient_accumulation_steps": gradient_accumulation_steps,
    "seq_len": max_commit_code_length,
    "epochs": epochs,
    "include_commit_msg":include_message,
    "trained_encoder_layers":trained_encoder_layers if not use_lora else -1,
    "dataset_version":version
}
wandb.init(project=data_name,name='{}/{}'.format(data_name,model_name_suffix),config=training_hyper_params)

In [32]:
#adam_opt = AdamW(model.parameters(),lr=5e-5,betas=[0.9,0.999],weight_decay=0.01)
trainer = Trainer(
        model=model,
        args=training_args,
        compute_metrics=compute_metrics,
        train_dataset=train_dataset.with_format("torch"),
        eval_dataset =valid_dataset.with_format("torch"),
        data_collator=data_collator,
        tokenizer=tokenizer,
        optimizers = (opt,lr_scheduler)
    )

In [None]:
print("One Epoch total steps: {}".format(train_steps / epochs))
train_res = trainer.train()

In [34]:
if print_for_debug:
    print(trainer.state.best_model_checkpoint)

./models/apache_jit/codebert512_msg_base/checkpoint-7050


## Evaluate and Predict

In [35]:
testing=True
if testing:
    test_dataset = test_dataset.map(encode,load_from_cache_file=False, batch_size=batch_size,batched=True, remove_columns=columns_to_remove)  
    trainer.model = AutoModelForSequenceClassification.from_pretrained(trainer.state.best_model_checkpoint).to('cuda') #
    test_res = trainer.evaluate(eval_dataset=test_dataset.with_format("torch"))
    print(test_res)

Map:   0%|          | 0/7526 [00:00<?, ? examples/s]

{'eval_loss': 0.6573278903961182, 'eval_accuracy': 0.7102046239702365, 'eval_f1': 0.5420953180768423, 'eval_precision': 0.3894419306184012, 'eval_recall': 0.8915745856353591, 'eval_matthews_correlation': 0.4435147704359337, 'eval_auc': 0.7792851539562119, 'eval_runtime': 60.5454, 'eval_samples_per_second': 124.303, 'eval_steps_per_second': 15.542, 'epoch': 10.01}


In [None]:
wandb.finish()