In [1]:
import transformers, tokenizers, torch, datasets
from datasets import load_dataset
import os
import glob
import pickle
import pandas as pd

In [2]:
print('transformers={}'.format(transformers.__version__))
print('tokenizers={}'.format(tokenizers.__version__))
print('torch={}'.format(torch.__version__))
print('datasets={}'.format(datasets.__version__))

transformers=4.31.0
tokenizers=0.11.0
torch=1.13.1
datasets=2.10.1


In [3]:
current_directory = os.getcwd()

In [4]:
def defect_percent(df):
    return df['label'].sum()/df.shape[0]

In [None]:
datatest_versions = ['base','full_change','special_tokens']
models_names = ['bilstm','codebert','codereviewer','javabert','codet5p']
fine_tuning_techniques = ['full','partial','lora']

In [None]:
include_message=True
include_metrics=False
use_lora = False
ft_technique = fine_tuning_techniques[1]
lsg_attention = False
copy_embedding_from_model=True
print_for_debug = False
max_commit_code_length = 512
partial_trained_encoders = 3
model_name = models_names[1]
dataset_version_name = datatest_versions[0]

In [None]:
create_lsg_model = False

### load dataset

In [5]:
from datasets import load_dataset
multiple_files = True
data_name = 'apache_jit'
prefix = '<java> '
version = 4

if dataset_version_name == 'special_tokens':
    version = 1
elif dataset_version_name == 'full_change':
    version = 5
else:
    version = 4
if include_metrics:
    columns_to_remove = ['id','msg','code','metrics']
else:
    columns_to_remove = ['id','msg','code']
#dataset_version_name = 'special_tokens' if version == 1 else 'full_change' if version == 5 else 'base'
dataset_version_name

'base'

In [6]:
train_dataset = load_dataset("csv",data_files=f'datasets/{data_name}/train_v{version}_shuffled.csv', streaming=True,split="train")
valid_dataset = load_dataset("csv",data_files=f'datasets/{data_name}/valid_v{version}_balanced.csv',split="train")
test_dataset = load_dataset("csv",data_files=f'datasets/{data_name}/test_v{version}.csv',split="train")
train_df = pd.read_csv(f'datasets/{data_name}/train_v{version}_shuffled.csv')
valid_df = pd.read_csv(f'datasets/{data_name}/valid_v{version}_balanced.csv')
test_df = pd.read_csv(f'datasets/{data_name}/test_v{version}.csv')
train_length = train_df.shape[0]
valid_length = valid_df.shape[0]
test_length  = test_df.shape[0]

Found cached dataset csv (/home/host2-virtualmachine1/.cache/huggingface/datasets/csv/default-2b5baf67d918b518/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)
Found cached dataset csv (/home/host2-virtualmachine1/.cache/huggingface/datasets/csv/default-5c559d330a4271fa/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)


In [7]:
valid_df.head()

Unnamed: 0,id,label,msg,code
0,c548a3d335dc4fcc2c1012a5d3f0f956feff7276,0,[FLINK-11549][tests] Remove obsolete ResourceM...,[['<del> package org.apache.flink.runtime.clus...
1,a02e8e0f9fde8372ff0eea0e674f72bb8be15875,1,ZEPPELIN-3876. Unable to rename note\n\n### Wh...,"[['<del> if (isRelative) {', '<add> if (isRela..."
2,98184bd078d7b957f4cf99d26a5aacef1583fe3a,0,[FLINK-12401][table] Support incremental emit ...,[['<add> import org.apache.flink.util.Collecto...
3,045b8da502328e72976d9e3aeb79a50090596bda,1,HIVE-16642 : New Events created as part of rep...,[['<add> package org.apache.hive.hcatalog.api....
4,2f1b3eab675ac327a6f61b724d5f0bce01ec6e68,1,HBASE-19998 Flakey TestVisibilityLabelsWithDef...,"[['<add> LOG.info(""REMOVE"", new Throwable(""REM..."


In [8]:
if model_name == 'bilstm'
    model_checkpoint = 'microsoft/codereviewer'
elif model_name == 'codebert':
    model_checkpoint = 'microsoft/codebert-base'
elif model_name == 'javabert':
    model_checkpoint = 'CAUKiel/JavaBERT'
elif model_name == 'codereviewer':
    model_checkpoint = 'microsoft/codereviewer'
else:
    model_checkpoint = 'Salesforce/codet5p-220m'
if lsg_attention:
    if os.path.exists("{}_lsg_{}".format(model_name,max_commit_code_length)):
        model_checkpoint = "{}_lsg_{}".format(model_name,max_commit_code_length)
    else:
        create_lsg_model = True
model_name_suffix = model_name + '_{}{}{}{}_{}'.format(max_commit_code_length,'_msg' if include_message else '','_mtc' if include_metrics else '', '_lsg' if lsg_attention else '', '_lora' if use_lora else '',dataset_version_name) 

### Tokenize the train and test data

In [9]:
if model_name == 'javabert':
    cls_token = '[CLS]'
    sep_token = '[SEP]'
    msg_token = '<msg>'
    metrics_token = '[CLS]'
    code_change_token = '[CLS]'
else:
    cls_token = '<s>'
    sep_token = '</s>'
    msg_token = '<msg>'
    metrics_token = '<s>''
    code_change_token = '<s>'
if dataset_version_name == 'special_tokens':
    added_token = '<added>'
    removed_token = '<removed>'
else:
    added_token = '<add>'
    removed_token = '<del>'
prefix = ''

In [None]:
def add_tokens_to_tokenizer(tokenizer):
    if dataset_version_name == 'special_tokens':
        tokenizer.add_special_tokens({'additional_special_tokens':[added_token, removed_token,'<STR>','<NUM>']})
    if include_message:
        tokenizer.add_special_tokens({'additional_special_tokens':[cls_token, sep_token,'<pad>', '<unk>',added_token, removed_token,msg_token]})
    else:
        tokenizer.add_special_tokens({'additional_special_tokens':[cls_token, sep_token,'<pad>', '<unk>',added_token, removed_token]})

In [None]:
if create_lsg_model:
    from lsg_converter import LSGConverter
    from transformers import AutoModelForSequenceClassification
    converter = LSGConverter(max_sequence_length=max_commit_code_length)
    if model_name == 'javabert':
        architecture = 'RobertaForSequenceClassification'
    elif model_name == 'codebert':
        architecture = 'RobertaForSequenceClassification'
    else:
        print('Error! LSG Attention not supported for T5 models at the moment. (CodeReviewer and CodeT5+)')
        exit()
    model, tokenizer = converter.convert_from_pretrained(model_checkpoint,dropout=0.2,hidden_dropout_prob=0.2,num_labels=2,architecture=architecture)
    add_tokens_to_tokenizer(tokenizer)
    model.resize_token_embeddings(len(tokenizer))
    save_path = '{}_lsg_{}'.format(model_name,max_commit_code_length)
    model_checkpoint = save_path
    model.save_pretrained(save_path)
    tokenizer.save_pretrained(save_path)

In [10]:
from transformers import AutoTokenizer
from bi_lstm import BiLSTM
if model_name == 'bilstm':
    if copy_embedding_from_model:
        tokenizer = AutoTokenizer.from_pretrained( model_checkpoint)
    else:
        from transformers import RobertaTokenizerFast
        tokenizer_name = '{}_{}_bpe'.format(data_name,vocab_size)
        tokenizer = RobertaTokenizerFast.from_pretrained('./BPE_tokenizer/{}'.format(tokenizer_name),max_len=max_commit_code_length)
else:
    tokenizer = AutoTokenizer.from_pretrained( model_checkpoint)
add_tokens_to_tokenizer(tokenizer)

In [13]:
if print_for_debug:
    print(len(tokenizer))

{'input_ids': [0, 2, 2], 'attention_mask': [1, 1, 1]}

In [16]:
def remove_empty(seq):
    return list(filter(lambda s: s != None and s != '',seq))


def join_commit_codes_sep(commit,commit_start=' <NFILE> ',file_sep=' <NFILE> ',line_sep=' <NLINE> '):
    if type(commit) == str:
        commit = eval(commit)
    #return commit_start + file_sep.join(remove_empty([line_sep.join([correct_token(line.split(' ')[0]) +' ' + ' '.join(line.split(' ')[1:]) for line in file]) for file in commit]))
    return commit_start + file_sep.join(remove_empty([line_sep.join(file) for file in commit]))

def join_commit_codes(commit):
    return join_commit_codes_sep(commit,prefix ,f' {sep_token} ','\n')
    #return join_commit_codes_sep(commit,cls_token+' ',f' {sep_token} ',' ')
def join_file_lines(file):
    if type(file) != list:
        file = eval(file)
    return prefix +  '\n'.join(file)

def empty_join_commit_codes(commit):
    return join_commit_codes_sep(commit,'','')
def join_lines(lines,commit_start=' <NFILE> ',line_sep=' <NLINE> '):
    return commit_start + line_sep.join(lines)
def flatten(l):
    return [item for sublist in l for item in sublist]
def join_commit_msg_and_code(msg,code):
    if msg is None:
        msg = ''
    if code is None:
        code = ''
    return msg_token + ' ' + msg.split('\n')[0] + ' ' + code_change_token + ' ' + join_commit_codes_sep(code ,'',f' {sep_token} ','\n')
def join_commit_msg_metrics_code(msg,mtc,code):
    if msg is None:
        msg = ''
    if code is None:
        code = ''
    if mtc is None:
        mtc = ''
    return msg_token + ' ' + msg.split('\n')[0] + '\n' + metrics_token + ' ' + mtc + '\n' + code_change_token + ' ' + join_commit_codes_sep(code ,'',f' {sep_token} ','\n')

In [17]:
def encode(batch):
    if multiple_files:
        if include_message:
            if include_metrics:
                inputs = tokenizer(list(map(join_commit_msg_metrics_code,batch['msg'],batch['metrics'],batch['code'])),truncation="longest_first",max_length=max_commit_code_length)
            else:
                inputs = tokenizer(list(map(join_commit_msg_and_code,batch['msg'],batch['code'])),truncation="longest_first",max_length=max_commit_code_length)
        elif include_metrics:
            inputs = tokenizer(list(map(join_commit_msg_and_code,batch['metrics'],batch['code'])),truncation="longest_first",max_length=max_commit_code_length)
        else:
            inputs = tokenizer(list(map(join_commit_codes,batch['code'])),truncation="longest_first",max_length=max_commit_code_length)
    else:    
        inputs = tokenizer(list(map(join_file_lines,batch['code'])),truncation="longest_first",max_length=max_commit_code_length)
    batch["input_ids"] = inputs.input_ids
    batch["attention_mask"] = inputs.attention_mask
    return batch

In [18]:
from transformers import DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer,max_length=max_commit_code_length,padding='longest')

2023-09-25 15:18:46.435917: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Model classes and initialization


In [19]:
device = 'cuda'

In [20]:
import torch
import torch.nn as nn
from transformers import AutoModel,AutoModelForSequenceClassification, AutoConfig
if model_name == 'bilstm':
    model = BiLSTM(len(tokenizer),embed_size=768,hidden_size=64,lstm_layers=4,dropout=0.2,padding_id=tokenizer.pad_token_id)
    if copy_embedding_from_model:
        copy_from_model = AutoModel.from_pretrained(model_checkpoint)
        with torch.no_grad():
            model.embedding.weight.copy_(copy_from_model.encoder.embed_tokens.weight)
            model.embedding.require_grad = False
else:
    config = AutoConfig.from_pretrained(model_checkpoint)
    config.hidden_dropout_prob = 0.2
    config.dropout = 0.2
    config.num_labels=2
    model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint,config=config)

    #change embedding layer size to match tokenizer vocabulary size (Because we added new tokens to the tokenizer):
    model.resize_token_embeddings(len(tokenizer))

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Embedding(50268, 768)

In [21]:
if print_for_debug:
    print(model)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50268, 768)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.2, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm

In [22]:
if ft_technique == 'lora':
    from peft import get_peft_config, PeftModel, PeftConfig, get_peft_model, LoraConfig, TaskType

    peft_config = LoraConfig(
        task_type=TaskType.SEQ_CLS, inference_mode=False, r=16, lora_alpha=16, lora_dropout=0.1, bias="all"
    )
    model = get_peft_model(model, peft_config)
    model.print_trainable_parameters()
elif ft_technique == 'partial':
    # freeze encoder layers except the last 2 layers
    modules = []
    trained_encoder_layers = partial_trained_encoders
    if model_name == 'codebert':
        modules = [model.roberta.embeddings, *model.roberta.encoder.layer[:-trained_encoder_layers]]
    elif model_name == 'javabert' 
        modules = [model.bert.embeddings, *model.bert.encoder.layer[:-trained_encoder_layers]]
    elif model_name == 'codereviewer' or model_name == 'codet5p':
        modules = [model.shared, *model.encoder.block[:-trained_encoder_layers]]
    for module in modules:
        for param in module.parameters():
            param.requires_grad = False

In [24]:
def param_count(model,trainable_only=True):
    return sum([p.numel()for p in model.parameters() if p.requires_grad or not trainable_only])
if print_for_debug:
    print(param_count(model))
    print(param_count(model,False))

14767874
124649474


In [25]:
import numpy as np
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score, recall_score, matthews_corrcoef
def roc_auc(preds,target):
    roc_auc_score(target, preds)

def compute_metrics(p):
    _predictions = p.predictions
    _labels = p.label_ids
    _predictions = np.argmax(_predictions, axis=-1)
    vals = {}
    vals['accuracy'] = accuracy_score(_labels, _predictions)
    vals['f1'] = f1_score(_labels, _predictions)
    vals['precision'] = precision_score(_labels, _predictions)
    vals['recall'] = recall_score(_labels, _predictions)
    vals['matthews_correlation'] = matthews_corrcoef(_labels, _predictions)
    vals['auc'] = roc_auc_score(_labels, _predictions.reshape(-1,1))
    return  vals

In [26]:
from transformers.optimization import AdamW
from transformers import Trainer,get_linear_schedule_with_warmup
from math import ceil
init_lr,head_lr = 5e-4 ,1e-4
adam_eps = 1e-6
weight_decay = 0.01
epochs=10
batch_size = 8
gradient_accumulation_steps = 8
batch_steps = int(train_length/(batch_size*gradient_accumulation_steps))
#rem_steps = train_length%(batch_size*gradient_accumulation_steps)
rem_steps = ceil((train_length%(batch_size*gradient_accumulation_steps)) / batch_size)
train_steps = (epochs) * (batch_steps + rem_steps)
warmpup_factor = 0.25
warmpup_steps = int(train_steps*warmpup_factor)
optim = 'adafactor'


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/host2-virtualmachine1/anaconda3/envs/jit_sdp/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda117.so
CUDA SETUP: CUDA runtime path found: /home/host2-virtualmachine1/anaconda3/envs/jit_sdp/lib/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /home/host2-virtualmachine1/anaconda3/envs/jit_sdp/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...


Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


In [27]:
train_dataset = train_dataset.map(encode, batch_size=batch_size,batched=True, remove_columns=columns_to_remove)
valid_dataset = valid_dataset.map(encode, batch_size=batch_size,batched=True, remove_columns=columns_to_remove)

Loading cached processed dataset at /home/host2-virtualmachine1/.cache/huggingface/datasets/csv/default-2b5baf67d918b518/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317/cache-f32eac4c6c9bd6ad.arrow


In [28]:
if print_for_debug:
    for v in range(3):
        print(valid_dataset['input_ids'][v])
        print(valid_dataset['label'][v])
        #print(valid_dataset['attention_mask'][v])
        print(tokenizer.decode(valid_dataset['input_ids'][v]))

[0, 50267, 646, 7613, 23617, 12, 15314, 3414, 46386, 47173, 742, 27336, 29707, 13877, 44854, 2068, 38834, 1437, 0, 1437, 50266, 3737, 31118, 4, 48530, 4, 4825, 4291, 4, 49600, 4, 3998, 10504, 48019, 131, 50118, 50266, 6595, 18735, 2348, 4, 24625, 4, 40448, 36383, 131, 50118, 50266, 6595, 18735, 2348, 4, 21959, 23199, 4, 32379, 34603, 29233, 131, 50118, 50266, 6595, 31118, 4, 48530, 4, 4825, 4291, 4, 43163, 27975, 4, 49602, 131, 50118, 50266, 6595, 31118, 4, 48530, 4, 4825, 4291, 4, 49600, 4, 677, 2348, 4, 33282, 2348, 41967, 5290, 131, 50118, 50266, 6595, 31118, 4, 48530, 4, 4825, 4291, 4, 49600, 4, 3998, 10504, 48019, 4, 41817, 4, 47279, 2688, 131, 50118, 50266, 6595, 31118, 4, 48530, 4, 4825, 4291, 4, 49600, 4, 3530, 39798, 4, 18522, 49054, 39868, 131, 50118, 50266, 6595, 31118, 4, 48530, 4, 4825, 4291, 4, 49600, 4, 3530, 39798, 4, 13424, 1999, 4, 35804, 17452, 4, 42578, 42495, 24017, 39868, 131, 50118, 50266, 6595, 31118, 4, 48530, 4, 4825, 4291, 4, 49600, 4, 48768, 4, 40448, 37155,

[0, 50267, 646, 7613, 23617, 12, 22960, 2663, 46386, 14595, 742, 7737, 16548, 29901, 223, 5438, 27814, 15664, 5745, 13, 786, 12, 42996, 5230, 19920, 46498, 41101, 15, 9513, 21013, 4, 1437, 0, 1437, 50265, 6595, 31118, 4, 48530, 4, 4825, 4291, 4, 32843, 4, 44252, 368, 131, 50118, 50266, 1009, 1437, 1437, 1437, 1437, 28696, 3572, 15698, 991, 405, 33977, 49138, 3572, 15698, 50118, 50265, 1009, 1437, 1437, 1437, 1437, 28696, 3572, 15698, 991, 405, 33977, 50, 29901, 39962, 3908, 27814, 15664, 49138, 3572, 15698, 50118, 50265, 1009, 28696, 5234, 15698, 50118, 50265, 1009, 25522, 1039, 20414, 50118, 50265, 1009, 28277, 358, 86, 77, 41, 40796, 898, 197, 28, 1468, 1538, 4, 20, 1835, 923, 115, 50118, 50265, 1009, 28, 1169, 41, 419, 8, 20044, 898, 36, 28030, 3435, 37141, 25, 414, 5240, 43, 50, 5, 507, 50118, 50265, 1009, 898, 9, 5, 40796, 4, 50118, 50265, 1009, 50118, 50265, 1009, 22248, 31, 29901, 33977, 6, 29901, 39962, 3908, 27814, 15664, 16, 341, 7, 29901, 3266, 14, 33, 57, 4752, 4, 50118, 50

In [29]:
from transformers import TrainingArguments
training_args = TrainingArguments(
    output_dir = './models/{}/{}'.format(data_name,model_name_suffix),
    num_train_epochs = epochs,
    per_device_train_batch_size = batch_size,
    gradient_accumulation_steps = gradient_accumulation_steps,
    per_device_eval_batch_size= batch_size,
    save_total_limit = 2,
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model ='auc',
    evaluation_strategy = "epoch",
    eval_steps=4,
    disable_tqdm = False,
    warmup_steps=warmpup_steps,
    logging_steps = 4,
    remove_unused_columns=False,
    report_to="wandb",
    fp16 = False,
    logging_dir= './models/{}/{}/logs/'.format(data_name,model_name_suffix),
    dataloader_num_workers = 0,
    max_steps=train_steps)

In [30]:
from transformers.optimization import Adafactor

if optim == 'adamw':
    opt = torch.optim.AdamW(model.parameters(),lr=init_lr,betas=(0.9, 0.999), eps=adam_eps, weight_decay=weight_decay)
elif optim == 'adafactor':
    opt = Adafactor(model.parameters(), lr=init_lr, relative_step=False, warmup_init=False)
scheduling_types = ['warmpup_anneal', 'warmup','constant']
scheduling_type = scheduling_types[1]

if scheduling_type == scheduling_types[0]:
    lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer=opt,
        max_lr=init_lr,
        pct_start=training_args.warmup_steps / training_args.max_steps,
        anneal_strategy="linear",
        total_steps=training_args.max_steps
    )
elif scheduling_type == scheduling_types[1]:
    lr_scheduler = transformers.get_constant_schedule_with_warmup(opt,training_args.warmup_steps)
else:
    lr_scheduler = torch.optim.lr_scheduler.LambdaLR(opt,lambda epoch: init_lr)

In [31]:
import wandb
# Login with your authentication key

wandb.login()
training_hyper_params = {
    "model_name": model_name_suffix,
    "optimizer": optim,
    "base_lr": init_lr,
    "weight_decay": weight_decay,
    "warmpup_factor":warmpup_factor,
    "warmpup_steps": warmpup_steps,
    "batch_size": batch_size,
    "gradient_accumulation_steps": gradient_accumulation_steps,
    "seq_len": max_commit_code_length,
    "epochs": epochs,
    "include_commit_msg":include_message,
    "trained_encoder_layers":trained_encoder_layers if not use_lora else -1,
    "dataset_version":version
}
wandb.init(project=data_name,name='{}/{}'.format(data_name,model_name_suffix),config=training_hyper_params)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[34m[1mwandb[0m: Currently logged in as: [33myaseralosh[0m ([33mjit_defect[0m). Use [1m`wandb login --relogin`[0m to force relogin


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [32]:
#adam_opt = AdamW(model.parameters(),lr=5e-5,betas=[0.9,0.999],weight_decay=0.01)
trainer = Trainer(
        model=model,
        args=training_args,
        compute_metrics=compute_metrics,
        train_dataset=train_dataset.with_format("torch"),
        eval_dataset =valid_dataset.with_format("torch"),
        data_collator=data_collator,
        tokenizer=tokenizer,
        optimizers = (opt,lr_scheduler)
    )

In [33]:
print("One Epoch total steps: {}".format(train_steps / epochs))
train_res = trainer.train()

One Epoch total steps: 705.0
Steps in epoch: 56400


  return pd.read_csv(xopen(filepath_or_buffer, "rb", use_auth_token=use_auth_token), **kwargs)
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Optimizer step: 7


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Matthews Correlation,Auc
0,0.6404,0.634895,0.649712,0.712817,0.604004,0.869452,0.33334,0.649712
1,0.5874,0.631022,0.704035,0.753717,0.64538,0.905764,0.445978,0.704035
2,0.5486,0.696978,0.71902,0.77193,0.649606,0.951009,0.494487,0.71902
3,0.5068,0.616584,0.745821,0.787367,0.676751,0.94121,0.534113,0.745821
4,0.4771,0.624268,0.742363,0.786227,0.671843,0.94755,0.531546,0.742363
5,0.4545,0.563263,0.761527,0.796358,0.694868,0.932565,0.556635,0.761527
6,0.4683,0.57014,0.755764,0.792661,0.688629,0.933718,0.547368,0.755764
7,0.4825,0.591152,0.750432,0.789653,0.68241,0.936888,0.539802,0.750432
8,0.4522,0.532951,0.773775,0.800102,0.716697,0.905476,0.567594,0.773775
9,0.4611,0.580399,0.758213,0.792686,0.693772,0.924496,0.547595,0.758213


Optimizer step: 15
Optimizer step: 23
Optimizer step: 31
Optimizer step: 39
Optimizer step: 47
Optimizer step: 55
Optimizer step: 63
Optimizer step: 71
Optimizer step: 79
Optimizer step: 87
Optimizer step: 95
Optimizer step: 103
Optimizer step: 111
Optimizer step: 119
Optimizer step: 127
Optimizer step: 135
Optimizer step: 143
Optimizer step: 151
Optimizer step: 159
Optimizer step: 167
Optimizer step: 175
Optimizer step: 183
Optimizer step: 191
Optimizer step: 199
Optimizer step: 207
Optimizer step: 215
Optimizer step: 223
Optimizer step: 231
Optimizer step: 239
Optimizer step: 247
Optimizer step: 255
Optimizer step: 263
Optimizer step: 271
Optimizer step: 279
Optimizer step: 287
Optimizer step: 295
Optimizer step: 303
Optimizer step: 311
Optimizer step: 319
Optimizer step: 327
Optimizer step: 335
Optimizer step: 343
Optimizer step: 351
Optimizer step: 359
Optimizer step: 367
Optimizer step: 375
Optimizer step: 383
Optimizer step: 391
Optimizer step: 399
Optimizer step: 407
Optimizer s

Optimizer step: 3191
Optimizer step: 3199
Optimizer step: 3207
Optimizer step: 3215
Optimizer step: 3223
Optimizer step: 3231
Optimizer step: 3239
Optimizer step: 3247
Optimizer step: 3255
Optimizer step: 3263
Optimizer step: 3271
Optimizer step: 3279
Optimizer step: 3287
Optimizer step: 3295
Optimizer step: 3303
Optimizer step: 3311
Optimizer step: 3319
Optimizer step: 3327
Optimizer step: 3335
Optimizer step: 3343
Optimizer step: 3351
Optimizer step: 3359
Optimizer step: 3367
Optimizer step: 3375
Optimizer step: 3383
Optimizer step: 3391
Optimizer step: 3399
Optimizer step: 3407
Optimizer step: 3415
Optimizer step: 3423
Optimizer step: 3431
Optimizer step: 3439
Optimizer step: 3447
Optimizer step: 3455
Optimizer step: 3463
Optimizer step: 3471
Optimizer step: 3479
Optimizer step: 3487
Optimizer step: 3495
Optimizer step: 3503
Optimizer step: 3511
Optimizer step: 3519
Optimizer step: 3527
Optimizer step: 3535
Optimizer step: 3543
Optimizer step: 3551
Optimizer step: 3559
Optimizer ste

  return pd.read_csv(xopen(filepath_or_buffer, "rb", use_auth_token=use_auth_token), **kwargs)


Optimizer step: 2
Optimizer step: 10
Optimizer step: 18
Optimizer step: 26
Optimizer step: 34
Optimizer step: 42
Optimizer step: 50
Optimizer step: 58
Optimizer step: 66
Optimizer step: 74
Optimizer step: 82
Optimizer step: 90
Optimizer step: 98
Optimizer step: 106
Optimizer step: 114
Optimizer step: 122
Optimizer step: 130
Optimizer step: 138
Optimizer step: 146
Optimizer step: 154
Optimizer step: 162
Optimizer step: 170
Optimizer step: 178
Optimizer step: 186
Optimizer step: 194
Optimizer step: 202
Optimizer step: 210
Optimizer step: 218
Optimizer step: 226
Optimizer step: 234
Optimizer step: 242
Optimizer step: 250
Optimizer step: 258
Optimizer step: 266
Optimizer step: 274
Optimizer step: 282
Optimizer step: 290
Optimizer step: 298
Optimizer step: 306
Optimizer step: 314
Optimizer step: 322
Optimizer step: 330
Optimizer step: 338
Optimizer step: 346
Optimizer step: 354
Optimizer step: 362
Optimizer step: 370
Optimizer step: 378
Optimizer step: 386
Optimizer step: 394
Optimizer step

Optimizer step: 3178
Optimizer step: 3186
Optimizer step: 3194
Optimizer step: 3202
Optimizer step: 3210
Optimizer step: 3218
Optimizer step: 3226
Optimizer step: 3234
Optimizer step: 3242
Optimizer step: 3250
Optimizer step: 3258
Optimizer step: 3266
Optimizer step: 3274
Optimizer step: 3282
Optimizer step: 3290
Optimizer step: 3298
Optimizer step: 3306
Optimizer step: 3314
Optimizer step: 3322
Optimizer step: 3330
Optimizer step: 3338
Optimizer step: 3346
Optimizer step: 3354
Optimizer step: 3362
Optimizer step: 3370
Optimizer step: 3378
Optimizer step: 3386
Optimizer step: 3394
Optimizer step: 3402
Optimizer step: 3410
Optimizer step: 3418
Optimizer step: 3426
Optimizer step: 3434
Optimizer step: 3442
Optimizer step: 3450
Optimizer step: 3458
Optimizer step: 3466
Optimizer step: 3474
Optimizer step: 3482
Optimizer step: 3490
Optimizer step: 3498
Optimizer step: 3506
Optimizer step: 3514
Optimizer step: 3522
Optimizer step: 3530
Optimizer step: 3538
Optimizer step: 3546
Optimizer ste

  return pd.read_csv(xopen(filepath_or_buffer, "rb", use_auth_token=use_auth_token), **kwargs)


Optimizer step: 5
Optimizer step: 13
Optimizer step: 21
Optimizer step: 29
Optimizer step: 37
Optimizer step: 45
Optimizer step: 53
Optimizer step: 61
Optimizer step: 69
Optimizer step: 77
Optimizer step: 85
Optimizer step: 93
Optimizer step: 101
Optimizer step: 109
Optimizer step: 117
Optimizer step: 125
Optimizer step: 133
Optimizer step: 141
Optimizer step: 149
Optimizer step: 157
Optimizer step: 165
Optimizer step: 173
Optimizer step: 181
Optimizer step: 189
Optimizer step: 197
Optimizer step: 205
Optimizer step: 213
Optimizer step: 221
Optimizer step: 229
Optimizer step: 237
Optimizer step: 245
Optimizer step: 253
Optimizer step: 261
Optimizer step: 269
Optimizer step: 277
Optimizer step: 285
Optimizer step: 293
Optimizer step: 301
Optimizer step: 309
Optimizer step: 317
Optimizer step: 325
Optimizer step: 333
Optimizer step: 341
Optimizer step: 349
Optimizer step: 357
Optimizer step: 365
Optimizer step: 373
Optimizer step: 381
Optimizer step: 389
Optimizer step: 397
Optimizer ste

Optimizer step: 3181
Optimizer step: 3189
Optimizer step: 3197
Optimizer step: 3205
Optimizer step: 3213
Optimizer step: 3221
Optimizer step: 3229
Optimizer step: 3237
Optimizer step: 3245
Optimizer step: 3253
Optimizer step: 3261
Optimizer step: 3269
Optimizer step: 3277
Optimizer step: 3285
Optimizer step: 3293
Optimizer step: 3301
Optimizer step: 3309
Optimizer step: 3317
Optimizer step: 3325
Optimizer step: 3333
Optimizer step: 3341
Optimizer step: 3349
Optimizer step: 3357
Optimizer step: 3365
Optimizer step: 3373
Optimizer step: 3381
Optimizer step: 3389
Optimizer step: 3397
Optimizer step: 3405
Optimizer step: 3413
Optimizer step: 3421
Optimizer step: 3429
Optimizer step: 3437
Optimizer step: 3445
Optimizer step: 3453
Optimizer step: 3461
Optimizer step: 3469
Optimizer step: 3477
Optimizer step: 3485
Optimizer step: 3493
Optimizer step: 3501
Optimizer step: 3509
Optimizer step: 3517
Optimizer step: 3525
Optimizer step: 3533
Optimizer step: 3541
Optimizer step: 3549
Optimizer ste

  return pd.read_csv(xopen(filepath_or_buffer, "rb", use_auth_token=use_auth_token), **kwargs)


Optimizer step: 0
Optimizer step: 8
Optimizer step: 16
Optimizer step: 24
Optimizer step: 32
Optimizer step: 40
Optimizer step: 48
Optimizer step: 56
Optimizer step: 64
Optimizer step: 72
Optimizer step: 80
Optimizer step: 88
Optimizer step: 96
Optimizer step: 104
Optimizer step: 112
Optimizer step: 120
Optimizer step: 128
Optimizer step: 136
Optimizer step: 144
Optimizer step: 152
Optimizer step: 160
Optimizer step: 168
Optimizer step: 176
Optimizer step: 184
Optimizer step: 192
Optimizer step: 200
Optimizer step: 208
Optimizer step: 216
Optimizer step: 224
Optimizer step: 232
Optimizer step: 240
Optimizer step: 248
Optimizer step: 256
Optimizer step: 264
Optimizer step: 272
Optimizer step: 280
Optimizer step: 288
Optimizer step: 296
Optimizer step: 304
Optimizer step: 312
Optimizer step: 320
Optimizer step: 328
Optimizer step: 336
Optimizer step: 344
Optimizer step: 352
Optimizer step: 360
Optimizer step: 368
Optimizer step: 376
Optimizer step: 384
Optimizer step: 392
Optimizer step:

Optimizer step: 3176
Optimizer step: 3184
Optimizer step: 3192
Optimizer step: 3200
Optimizer step: 3208
Optimizer step: 3216
Optimizer step: 3224
Optimizer step: 3232
Optimizer step: 3240
Optimizer step: 3248
Optimizer step: 3256
Optimizer step: 3264
Optimizer step: 3272
Optimizer step: 3280
Optimizer step: 3288
Optimizer step: 3296
Optimizer step: 3304
Optimizer step: 3312
Optimizer step: 3320
Optimizer step: 3328
Optimizer step: 3336
Optimizer step: 3344
Optimizer step: 3352
Optimizer step: 3360
Optimizer step: 3368
Optimizer step: 3376
Optimizer step: 3384
Optimizer step: 3392
Optimizer step: 3400
Optimizer step: 3408
Optimizer step: 3416
Optimizer step: 3424
Optimizer step: 3432
Optimizer step: 3440
Optimizer step: 3448
Optimizer step: 3456
Optimizer step: 3464
Optimizer step: 3472
Optimizer step: 3480
Optimizer step: 3488
Optimizer step: 3496
Optimizer step: 3504
Optimizer step: 3512
Optimizer step: 3520
Optimizer step: 3528
Optimizer step: 3536
Optimizer step: 3544
Optimizer ste

  return pd.read_csv(xopen(filepath_or_buffer, "rb", use_auth_token=use_auth_token), **kwargs)


Optimizer step: 3
Optimizer step: 11
Optimizer step: 19
Optimizer step: 27
Optimizer step: 35
Optimizer step: 43
Optimizer step: 51
Optimizer step: 59
Optimizer step: 67
Optimizer step: 75
Optimizer step: 83
Optimizer step: 91
Optimizer step: 99
Optimizer step: 107
Optimizer step: 115
Optimizer step: 123
Optimizer step: 131
Optimizer step: 139
Optimizer step: 147
Optimizer step: 155
Optimizer step: 163
Optimizer step: 171
Optimizer step: 179
Optimizer step: 187
Optimizer step: 195
Optimizer step: 203
Optimizer step: 211
Optimizer step: 219
Optimizer step: 227
Optimizer step: 235
Optimizer step: 243
Optimizer step: 251
Optimizer step: 259
Optimizer step: 267
Optimizer step: 275
Optimizer step: 283
Optimizer step: 291
Optimizer step: 299
Optimizer step: 307
Optimizer step: 315
Optimizer step: 323
Optimizer step: 331
Optimizer step: 339
Optimizer step: 347
Optimizer step: 355
Optimizer step: 363
Optimizer step: 371
Optimizer step: 379
Optimizer step: 387
Optimizer step: 395
Optimizer step

Optimizer step: 3179
Optimizer step: 3187
Optimizer step: 3195
Optimizer step: 3203
Optimizer step: 3211
Optimizer step: 3219
Optimizer step: 3227
Optimizer step: 3235
Optimizer step: 3243
Optimizer step: 3251
Optimizer step: 3259
Optimizer step: 3267
Optimizer step: 3275
Optimizer step: 3283
Optimizer step: 3291
Optimizer step: 3299
Optimizer step: 3307
Optimizer step: 3315
Optimizer step: 3323
Optimizer step: 3331
Optimizer step: 3339
Optimizer step: 3347
Optimizer step: 3355
Optimizer step: 3363
Optimizer step: 3371
Optimizer step: 3379
Optimizer step: 3387
Optimizer step: 3395
Optimizer step: 3403
Optimizer step: 3411
Optimizer step: 3419
Optimizer step: 3427
Optimizer step: 3435
Optimizer step: 3443
Optimizer step: 3451
Optimizer step: 3459
Optimizer step: 3467
Optimizer step: 3475
Optimizer step: 3483
Optimizer step: 3491
Optimizer step: 3499
Optimizer step: 3507
Optimizer step: 3515
Optimizer step: 3523
Optimizer step: 3531
Optimizer step: 3539
Optimizer step: 3547
Optimizer ste

  return pd.read_csv(xopen(filepath_or_buffer, "rb", use_auth_token=use_auth_token), **kwargs)


Optimizer step: 6
Optimizer step: 14
Optimizer step: 22
Optimizer step: 30
Optimizer step: 38
Optimizer step: 46
Optimizer step: 54
Optimizer step: 62
Optimizer step: 70
Optimizer step: 78
Optimizer step: 86
Optimizer step: 94
Optimizer step: 102
Optimizer step: 110
Optimizer step: 118
Optimizer step: 126
Optimizer step: 134
Optimizer step: 142
Optimizer step: 150
Optimizer step: 158
Optimizer step: 166
Optimizer step: 174
Optimizer step: 182
Optimizer step: 190
Optimizer step: 198
Optimizer step: 206
Optimizer step: 214
Optimizer step: 222
Optimizer step: 230
Optimizer step: 238
Optimizer step: 246
Optimizer step: 254
Optimizer step: 262
Optimizer step: 270
Optimizer step: 278
Optimizer step: 286
Optimizer step: 294
Optimizer step: 302
Optimizer step: 310
Optimizer step: 318
Optimizer step: 326
Optimizer step: 334
Optimizer step: 342
Optimizer step: 350
Optimizer step: 358
Optimizer step: 366
Optimizer step: 374
Optimizer step: 382
Optimizer step: 390
Optimizer step: 398
Optimizer ste

Optimizer step: 3182
Optimizer step: 3190
Optimizer step: 3198
Optimizer step: 3206
Optimizer step: 3214
Optimizer step: 3222
Optimizer step: 3230
Optimizer step: 3238
Optimizer step: 3246
Optimizer step: 3254
Optimizer step: 3262
Optimizer step: 3270
Optimizer step: 3278
Optimizer step: 3286
Optimizer step: 3294
Optimizer step: 3302
Optimizer step: 3310
Optimizer step: 3318
Optimizer step: 3326
Optimizer step: 3334
Optimizer step: 3342
Optimizer step: 3350
Optimizer step: 3358
Optimizer step: 3366
Optimizer step: 3374
Optimizer step: 3382
Optimizer step: 3390
Optimizer step: 3398
Optimizer step: 3406
Optimizer step: 3414
Optimizer step: 3422
Optimizer step: 3430
Optimizer step: 3438
Optimizer step: 3446
Optimizer step: 3454
Optimizer step: 3462
Optimizer step: 3470
Optimizer step: 3478
Optimizer step: 3486
Optimizer step: 3494
Optimizer step: 3502
Optimizer step: 3510
Optimizer step: 3518
Optimizer step: 3526
Optimizer step: 3534
Optimizer step: 3542
Optimizer step: 3550
Optimizer ste

  return pd.read_csv(xopen(filepath_or_buffer, "rb", use_auth_token=use_auth_token), **kwargs)


Optimizer step: 1
Optimizer step: 9
Optimizer step: 17
Optimizer step: 25
Optimizer step: 33
Optimizer step: 41
Optimizer step: 49
Optimizer step: 57
Optimizer step: 65
Optimizer step: 73
Optimizer step: 81
Optimizer step: 89
Optimizer step: 97
Optimizer step: 105
Optimizer step: 113
Optimizer step: 121
Optimizer step: 129
Optimizer step: 137
Optimizer step: 145
Optimizer step: 153
Optimizer step: 161
Optimizer step: 169
Optimizer step: 177
Optimizer step: 185
Optimizer step: 193
Optimizer step: 201
Optimizer step: 209
Optimizer step: 217
Optimizer step: 225
Optimizer step: 233
Optimizer step: 241
Optimizer step: 249
Optimizer step: 257
Optimizer step: 265
Optimizer step: 273
Optimizer step: 281
Optimizer step: 289
Optimizer step: 297
Optimizer step: 305
Optimizer step: 313
Optimizer step: 321
Optimizer step: 329
Optimizer step: 337
Optimizer step: 345
Optimizer step: 353
Optimizer step: 361
Optimizer step: 369
Optimizer step: 377
Optimizer step: 385
Optimizer step: 393
Optimizer step:

Optimizer step: 3177
Optimizer step: 3185
Optimizer step: 3193
Optimizer step: 3201
Optimizer step: 3209
Optimizer step: 3217
Optimizer step: 3225
Optimizer step: 3233
Optimizer step: 3241
Optimizer step: 3249
Optimizer step: 3257
Optimizer step: 3265
Optimizer step: 3273
Optimizer step: 3281
Optimizer step: 3289
Optimizer step: 3297
Optimizer step: 3305
Optimizer step: 3313
Optimizer step: 3321
Optimizer step: 3329
Optimizer step: 3337
Optimizer step: 3345
Optimizer step: 3353
Optimizer step: 3361
Optimizer step: 3369
Optimizer step: 3377
Optimizer step: 3385
Optimizer step: 3393
Optimizer step: 3401
Optimizer step: 3409
Optimizer step: 3417
Optimizer step: 3425
Optimizer step: 3433
Optimizer step: 3441
Optimizer step: 3449
Optimizer step: 3457
Optimizer step: 3465
Optimizer step: 3473
Optimizer step: 3481
Optimizer step: 3489
Optimizer step: 3497
Optimizer step: 3505
Optimizer step: 3513
Optimizer step: 3521
Optimizer step: 3529
Optimizer step: 3537
Optimizer step: 3545
Optimizer ste

  return pd.read_csv(xopen(filepath_or_buffer, "rb", use_auth_token=use_auth_token), **kwargs)


Optimizer step: 4
Optimizer step: 12
Optimizer step: 20
Optimizer step: 28
Optimizer step: 36
Optimizer step: 44
Optimizer step: 52
Optimizer step: 60
Optimizer step: 68
Optimizer step: 76
Optimizer step: 84
Optimizer step: 92
Optimizer step: 100
Optimizer step: 108
Optimizer step: 116
Optimizer step: 124
Optimizer step: 132
Optimizer step: 140
Optimizer step: 148
Optimizer step: 156
Optimizer step: 164
Optimizer step: 172
Optimizer step: 180
Optimizer step: 188
Optimizer step: 196
Optimizer step: 204
Optimizer step: 212
Optimizer step: 220
Optimizer step: 228
Optimizer step: 236
Optimizer step: 244
Optimizer step: 252
Optimizer step: 260
Optimizer step: 268
Optimizer step: 276
Optimizer step: 284
Optimizer step: 292
Optimizer step: 300
Optimizer step: 308
Optimizer step: 316
Optimizer step: 324
Optimizer step: 332
Optimizer step: 340
Optimizer step: 348
Optimizer step: 356
Optimizer step: 364
Optimizer step: 372
Optimizer step: 380
Optimizer step: 388
Optimizer step: 396
Optimizer ste

Optimizer step: 3180
Optimizer step: 3188
Optimizer step: 3196
Optimizer step: 3204
Optimizer step: 3212
Optimizer step: 3220
Optimizer step: 3228
Optimizer step: 3236
Optimizer step: 3244
Optimizer step: 3252
Optimizer step: 3260
Optimizer step: 3268
Optimizer step: 3276
Optimizer step: 3284
Optimizer step: 3292
Optimizer step: 3300
Optimizer step: 3308
Optimizer step: 3316
Optimizer step: 3324
Optimizer step: 3332
Optimizer step: 3340
Optimizer step: 3348
Optimizer step: 3356
Optimizer step: 3364
Optimizer step: 3372
Optimizer step: 3380
Optimizer step: 3388
Optimizer step: 3396
Optimizer step: 3404
Optimizer step: 3412
Optimizer step: 3420
Optimizer step: 3428
Optimizer step: 3436
Optimizer step: 3444
Optimizer step: 3452
Optimizer step: 3460
Optimizer step: 3468
Optimizer step: 3476
Optimizer step: 3484
Optimizer step: 3492
Optimizer step: 3500
Optimizer step: 3508
Optimizer step: 3516
Optimizer step: 3524
Optimizer step: 3532
Optimizer step: 3540
Optimizer step: 3548
Optimizer ste

  return pd.read_csv(xopen(filepath_or_buffer, "rb", use_auth_token=use_auth_token), **kwargs)


Optimizer step: 7
Optimizer step: 15
Optimizer step: 23
Optimizer step: 31
Optimizer step: 39
Optimizer step: 47
Optimizer step: 55
Optimizer step: 63
Optimizer step: 71
Optimizer step: 79
Optimizer step: 87
Optimizer step: 95
Optimizer step: 103
Optimizer step: 111
Optimizer step: 119
Optimizer step: 127
Optimizer step: 135
Optimizer step: 143
Optimizer step: 151
Optimizer step: 159
Optimizer step: 167
Optimizer step: 175
Optimizer step: 183
Optimizer step: 191
Optimizer step: 199
Optimizer step: 207
Optimizer step: 215
Optimizer step: 223
Optimizer step: 231
Optimizer step: 239
Optimizer step: 247
Optimizer step: 255
Optimizer step: 263
Optimizer step: 271
Optimizer step: 279
Optimizer step: 287
Optimizer step: 295
Optimizer step: 303
Optimizer step: 311
Optimizer step: 319
Optimizer step: 327
Optimizer step: 335
Optimizer step: 343
Optimizer step: 351
Optimizer step: 359
Optimizer step: 367
Optimizer step: 375
Optimizer step: 383
Optimizer step: 391
Optimizer step: 399
Optimizer ste

Optimizer step: 3183
Optimizer step: 3191
Optimizer step: 3199
Optimizer step: 3207
Optimizer step: 3215
Optimizer step: 3223
Optimizer step: 3231
Optimizer step: 3239
Optimizer step: 3247
Optimizer step: 3255
Optimizer step: 3263
Optimizer step: 3271
Optimizer step: 3279
Optimizer step: 3287
Optimizer step: 3295
Optimizer step: 3303
Optimizer step: 3311
Optimizer step: 3319
Optimizer step: 3327
Optimizer step: 3335
Optimizer step: 3343
Optimizer step: 3351
Optimizer step: 3359
Optimizer step: 3367
Optimizer step: 3375
Optimizer step: 3383
Optimizer step: 3391
Optimizer step: 3399
Optimizer step: 3407
Optimizer step: 3415
Optimizer step: 3423
Optimizer step: 3431
Optimizer step: 3439
Optimizer step: 3447
Optimizer step: 3455
Optimizer step: 3463
Optimizer step: 3471
Optimizer step: 3479
Optimizer step: 3487
Optimizer step: 3495
Optimizer step: 3503
Optimizer step: 3511
Optimizer step: 3519
Optimizer step: 3527
Optimizer step: 3535
Optimizer step: 3543
Optimizer step: 3551
Optimizer ste

  return pd.read_csv(xopen(filepath_or_buffer, "rb", use_auth_token=use_auth_token), **kwargs)


Optimizer step: 2
Optimizer step: 10
Optimizer step: 18
Optimizer step: 26
Optimizer step: 34
Optimizer step: 42
Optimizer step: 50
Optimizer step: 58
Optimizer step: 66
Optimizer step: 74
Optimizer step: 82
Optimizer step: 90
Optimizer step: 98
Optimizer step: 106
Optimizer step: 114
Optimizer step: 122
Optimizer step: 130
Optimizer step: 138
Optimizer step: 146
Optimizer step: 154
Optimizer step: 162
Optimizer step: 170
Optimizer step: 178
Optimizer step: 186
Optimizer step: 194
Optimizer step: 202
Optimizer step: 210
Optimizer step: 218
Optimizer step: 226
Optimizer step: 234
Optimizer step: 242
Optimizer step: 250
Optimizer step: 258
Optimizer step: 266
Optimizer step: 274
Optimizer step: 282
Optimizer step: 290
Optimizer step: 298
Optimizer step: 306
Optimizer step: 314
Optimizer step: 322
Optimizer step: 330
Optimizer step: 338
Optimizer step: 346
Optimizer step: 354
Optimizer step: 362
Optimizer step: 370
Optimizer step: 378
Optimizer step: 386
Optimizer step: 394
Optimizer step

Optimizer step: 3178
Optimizer step: 3186
Optimizer step: 3194
Optimizer step: 3202
Optimizer step: 3210
Optimizer step: 3218
Optimizer step: 3226
Optimizer step: 3234
Optimizer step: 3242
Optimizer step: 3250
Optimizer step: 3258
Optimizer step: 3266
Optimizer step: 3274
Optimizer step: 3282
Optimizer step: 3290
Optimizer step: 3298
Optimizer step: 3306
Optimizer step: 3314
Optimizer step: 3322
Optimizer step: 3330
Optimizer step: 3338
Optimizer step: 3346
Optimizer step: 3354
Optimizer step: 3362
Optimizer step: 3370
Optimizer step: 3378
Optimizer step: 3386
Optimizer step: 3394
Optimizer step: 3402
Optimizer step: 3410
Optimizer step: 3418
Optimizer step: 3426
Optimizer step: 3434
Optimizer step: 3442
Optimizer step: 3450
Optimizer step: 3458
Optimizer step: 3466
Optimizer step: 3474
Optimizer step: 3482
Optimizer step: 3490
Optimizer step: 3498
Optimizer step: 3506
Optimizer step: 3514
Optimizer step: 3522
Optimizer step: 3530
Optimizer step: 3538
Optimizer step: 3546
Optimizer ste

  return pd.read_csv(xopen(filepath_or_buffer, "rb", use_auth_token=use_auth_token), **kwargs)


Optimizer step: 5
Optimizer step: 13
Optimizer step: 21
Optimizer step: 29
Optimizer step: 37
Optimizer step: 45
Optimizer step: 53
Optimizer step: 61
Optimizer step: 69
Optimizer step: 77
Optimizer step: 85
Optimizer step: 93
Optimizer step: 101
Optimizer step: 109
Optimizer step: 117
Optimizer step: 125
Optimizer step: 133
Optimizer step: 141
Optimizer step: 149
Optimizer step: 157
Optimizer step: 165
Optimizer step: 173
Optimizer step: 181
Optimizer step: 189
Optimizer step: 197
Optimizer step: 205
Optimizer step: 213
Optimizer step: 221
Optimizer step: 229
Optimizer step: 237
Optimizer step: 245
Optimizer step: 253
Optimizer step: 261
Optimizer step: 269
Optimizer step: 277
Optimizer step: 285
Optimizer step: 293
Optimizer step: 301
Optimizer step: 309
Optimizer step: 317
Optimizer step: 325
Optimizer step: 333
Optimizer step: 341
Optimizer step: 349


In [34]:
if print_for_debug:
    print(trainer.state.best_model_checkpoint)

./models/apache_jit/codebert512_msg_base/checkpoint-7050


## Evaluate and Predict

In [35]:
testing=True
if testing:
    test_dataset = test_dataset.map(encode,load_from_cache_file=False, batch_size=batch_size,batched=True, remove_columns=columns_to_remove)  
    trainer.model = AutoModelForSequenceClassification.from_pretrained(trainer.state.best_model_checkpoint).to('cuda') #
    test_res = trainer.evaluate(eval_dataset=test_dataset.with_format("torch"))
    print(test_res)

Map:   0%|          | 0/7526 [00:00<?, ? examples/s]

{'eval_loss': 0.6573278903961182, 'eval_accuracy': 0.7102046239702365, 'eval_f1': 0.5420953180768423, 'eval_precision': 0.3894419306184012, 'eval_recall': 0.8915745856353591, 'eval_matthews_correlation': 0.4435147704359337, 'eval_auc': 0.7792851539562119, 'eval_runtime': 60.5454, 'eval_samples_per_second': 124.303, 'eval_steps_per_second': 15.542, 'epoch': 10.01}


In [36]:
wandb.finish()

0,1
eval/accuracy,▁▄▅▆▆▇▇▆█▇█▄
eval/auc,▁▄▅▆▆▇▇▆█▇██
eval/f1,▆▇▇████████▁
eval/loss,▆▅█▅▅▃▃▄▂▃▁▆
eval/matthews_correlation,▁▄▆▇▇█▇▇█▇█▄
eval/precision,▅▆▆▇▇▇▇▇█▇█▁
eval/recall,▁▄█▇█▆▇▇▄▆▂▃
eval/runtime,▁▂▁▁▁▁▁▁▁▂▂█
eval/samples_per_second,▂▁▂▂▂▂▂▂▂▁▁█
eval/steps_per_second,▂▁▂▂▂▂▂▂▂▁▁█

0,1
eval/accuracy,0.7102
eval/auc,0.77929
eval/f1,0.5421
eval/loss,0.65733
eval/matthews_correlation,0.44351
eval/precision,0.38944
eval/recall,0.89157
eval/runtime,60.5454
eval/samples_per_second,124.303
eval/steps_per_second,15.542
