In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset

In [2]:
if torch.cuda.is_available():
    print("CUDA is available")
else:
    print("CUDA is not available")

CUDA is available


In [3]:
import pandas as pd
import numpy as np

In [4]:
malware_calls = pd.read_csv("../datasets/CatakPreprocessed.csv")

In [5]:
malware_calls.iloc[0]['class']

'Trojan'

In [6]:
malware_calls.head()

Unnamed: 0,api,class
0,ldrloaddll ldrgetprocedureaddress regopenkeyex...,Trojan
1,getsystemtimeasfiletime ntallocatevirtualmemor...,Trojan
2,ldrgetdllhandle ldrgetprocedureaddress getsyst...,Backdoor
3,ldrloaddll ldrgetprocedureaddress regopenkeyex...,Backdoor
4,ldrloaddll ldrgetprocedureaddress wsastartup n...,Trojan


In [7]:
malware_calls['class'].value_counts()

Trojan        1001
Backdoor      1001
Downloader    1001
Worms         1001
Virus         1001
Dropper        891
Spyware        832
Adware         379
Name: class, dtype: int64

In [8]:
num_classes = len(malware_calls["class"].value_counts())
class_weights = (1 - (malware_calls['class'].value_counts().sort_index() / len(malware_calls))).values
class_weights = torch.from_numpy(class_weights).float().to("cuda")
class_weights

tensor([0.9467, 0.8592, 0.8592, 0.8746, 0.8829, 0.8592, 0.8592, 0.8592],
       device='cuda:0')

### Class Mapping

In [9]:
CAT2IDX = {
    'Virus': 0,
    'Trojan': 1,
    'Worms': 2,
    'Downloader': 3,
    'Backdoor': 4,
    'Dropper': 5,
    'Spyware': 6,
    'Adware': 7,
}

IDX2CAT = {
    0:'Virus',
    1:'Trojan',
    2:'Worms',
    3:'Downloader',
    4:'Backdoor',
    5:'Dropper',
    6:'Spyware',
    7:'Adware',
}

from huggingface_hub import login
login()

## Load DistilBERT Model Checkpoint from Hugging Face

In [10]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_name = "distilbert-base-uncased"

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_classes, label2id=CAT2IDX, id2label=IDX2CAT)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training
tokenizer.model_max_length = 512

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Split Data for Training and Validation

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(malware_calls.api, malware_calls['class'],
test_size=0.2, random_state=75, stratify = malware_calls['class'])

In [11]:
from datasets import load_dataset, Dataset
import datasets

train = Dataset.from_pandas(pd.concat([Y_train, X_train], axis=1)).remove_columns('__index_level_0__')
validation = Dataset.from_pandas(pd.concat([Y_test, X_test], axis=1)).remove_columns('__index_level_0__')

dataset = datasets.DatasetDict({"train": train, "validation": validation})
dataset["train"] = dataset["train"].rename_column("class", "label")
dataset["validation"] = dataset["validation"].rename_column("class", "label")

# Rename 'api' to 'text' in both train and validation datasets
dataset["train"] = dataset["train"].rename_column("api", "text")
dataset["validation"] = dataset["validation"].rename_column("api", "text")
dataset

DatasetDict({
    train: Dataset({
        features: ['label', 'text'],
        num_rows: 5685
    })
    validation: Dataset({
        features: ['label', 'text'],
        num_rows: 1422
    })
})

In [12]:
def convert_label(example):
    # Convert the label using the CAT2IDX mapping
    example['label'] = CAT2IDX[example['label']]
    return example

# Apply the function to both the train and validation datasets
dataset['train'] = dataset['train'].map(convert_label)
dataset['validation'] = dataset['validation'].map(convert_label)

Map:   0%|          | 0/5685 [00:00<?, ? examples/s]

Map:   0%|          | 0/1422 [00:00<?, ? examples/s]

## Load or Create Tokenized Dataset

In [13]:
from datasets import load_from_disk
import os
from transformers import AutoTokenizer

# Assuming you have already defined your tokenizer
# If not, initialize it like this (replace 'your-model-name' with the model you are using):
# tokenizer = AutoTokenizer.from_pretrained('your-model-name')

def tokenize_function(examples):
    # Extract text
    text = examples['text']
    
    # Set truncation side (optional, based on your preference)
    tokenizer.truncation_side = "left"

    # Tokenize with truncation and padding
    tokenized_inputs = tokenizer(
        text,
        padding="max_length",  # Add padding to the max_length
        truncation=True,
        max_length=512,
        return_tensors='np'   # Return numpy tensors
    )
    
    return tokenized_inputs

# Rest of your code for loading/saving dataset
directory_path = 'tokenized_datasets'
file_name = 'catak_tokenized'
full_path = os.path.join(directory_path, file_name)

if os.path.exists(full_path):
    # Load the existing dataset
    tokenized_dataset = load_from_disk(full_path)
    print("Loaded the tokenized dataset.")
else:
    # Ensure the directory exists
    os.makedirs(directory_path, exist_ok=True)
    
    # Add pad token if it's not already present
    if tokenizer.pad_token is None:
        tokenizer.add_special_tokens({"pad_token": "[PAD]"})
        # Resize model embeddings to account for new tokens
        model.resize_token_embeddings(len(tokenizer))

    # Apply tokenization function to the dataset
    tokenized_dataset = dataset.map(tokenize_function, batched=True)

    # Save the new tokenized dataset
    tokenized_dataset.save_to_disk(full_path)
    print("Saved new tokenized dataset.")

# Print or return the tokenized dataset
print(tokenized_dataset)

Loaded the tokenized dataset.
DatasetDict({
    train: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 5685
    })
    validation: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 1422
    })
})


In [14]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

NameError: name 'tokenizer' is not defined

In [None]:
import numpy as np
from datasets import load_metric
from sklearn.metrics import roc_auc_score
from scipy.special import softmax

def compute_metrics(eval_pred):
    precision = load_metric("precision")
    recall = load_metric("recall")
    f1 = load_metric("f1")
    acc = load_metric("accuracy")
    mcc = load_metric("matthews_correlation")
    #auc = load_metric("auc")
    
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision = precision.compute(predictions=predictions, average = "macro", references=labels)["precision"]
    recall = recall.compute(predictions=predictions, average = "macro", references=labels)["recall"]
    f1 = f1.compute(predictions=predictions, average = "macro", references=labels)["f1"]
    acc = acc.compute(predictions=predictions, references=labels)["accuracy"]
    mcc = mcc.compute(predictions=predictions, references=labels)["matthews_correlation"]
    auc = roc_auc_score(labels, softmax(logits, axis=1), multi_class='ovo', average='macro')
    return {"precision": precision, "recall": recall, "acc": acc, "mcc": mcc, "f1": f1, "auc":auc}

In [17]:
import torch
torch.cuda.empty_cache()
# Check if CUDA is available
if torch.cuda.is_available():
    # Get the number of GPUs available
    n_gpu = torch.cuda.device_count()
    for i in range(n_gpu):
        print(f"GPU {i}:")
        print(f"  Total memory: {torch.cuda.get_device_properties(i).total_memory / 1e9} GB")
        print(f"  Allocated memory: {torch.cuda.memory_allocated(i) / 1e9} GB")
        print(f"  Cached memory: {torch.cuda.memory_reserved(i) / 1e9} GB")
else:
    print("CUDA is not available.")

GPU 0:
  Total memory: 8.58947584 GB
  Allocated memory: 5.12e-07 GB
  Cached memory: 0.002097152 GB


### Prediction Using Base Model Performance is not Ideal

In [18]:
input = tokenizer.encode(malware_calls.iloc[0]['api'], return_tensors="pt")
input = input.to(torch.device("cuda"))
logits = model(input).logits
prediction = torch.argmax(logits)
print(malware_calls.iloc[0]['class'] + " - " + IDX2CAT[prediction.tolist()])

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper_CUDA__index_select)

In [20]:
from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig

peft_config = LoraConfig(
    task_type="SEQ_CLS",
    r=4,
    lora_alpha=32,
    lora_dropout=0.01,
    target_modules=['q_lin', 'v_lin'] # Only apply LORA to the query and value projections, the paper on LORA suggests that this provides the best results
)

In [21]:
print(model)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

### PEFT Will Only Tune %0.06 of the Parameters

In [22]:
peft_model = get_peft_model(model, peft_config)
peft_model.print_trainable_parameters()

trainable params: 633,608 || all params: 67,593,232 || trainable%: 0.9373837901404093


In [31]:
# hyperparameters
lr = 1e-3
batch_size = 4
num_epochs = 100

In [32]:
from transformers import TrainingArguments

# define training arguments
training_args = TrainingArguments(
    output_dir= model_name + "-Malware-Classification",
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    weight_decay=0.1,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

In [16]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
model_name = "distilbert-base-uncased"
model_path = model_name + "-Malware-Classification"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

OSError: distilbert-base-uncased-Malware-Classification does not appear to have a file named config.json. Checkout 'https://huggingface.co/distilbert-base-uncased-Malware-Classification/None' for available files.

In [33]:
from transformers import Trainer

try:
    # creater trainer object
    trainer = Trainer(
        model=peft_model,
        args=training_args,
        train_dataset=tokenized_dataset["train"],
        eval_dataset=tokenized_dataset["validation"],
        tokenizer=tokenizer,
        data_collator=data_collator, # this will dynamically pad examples in each batch to be equal length
        compute_metrics=compute_metrics,
    )
    
    trainer.train()
    
    
except RuntimeError as e:
    print(f"An error occurred: {e}")
    torch.cuda.empty_cache()  # Clearing the CUDA cache

  0%|          | 0/142200 [00:00<?, ?it/s]

{'loss': 1.9052, 'learning_rate': 0.0009964838255977497, 'epoch': 0.35}
{'loss': 1.9164, 'learning_rate': 0.0009929676511954994, 'epoch': 0.7}


  0%|          | 0/356 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 1.8606908321380615, 'eval_precision': 0.2926126900872718, 'eval_recall': 0.2799062530340609, 'eval_acc': 0.2890295358649789, 'eval_mcc': 0.19364218153333052, 'eval_f1': 0.2572794935217092, 'eval_auc': 0.6890314173086913, 'eval_runtime': 29.2505, 'eval_samples_per_second': 48.615, 'eval_steps_per_second': 12.171, 'epoch': 1.0}
{'loss': 1.9003, 'learning_rate': 0.000989451476793249, 'epoch': 1.05}
{'loss': 1.9173, 'learning_rate': 0.0009859353023909986, 'epoch': 1.41}
{'loss': 1.9064, 'learning_rate': 0.0009824191279887483, 'epoch': 1.76}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.9111125469207764, 'eval_precision': 0.3239115192468075, 'eval_recall': 0.24918462958457782, 'eval_acc': 0.26933895921237694, 'eval_mcc': 0.15779335300104816, 'eval_f1': 0.23869373725104867, 'eval_auc': 0.6770416274918887, 'eval_runtime': 30.7853, 'eval_samples_per_second': 46.191, 'eval_steps_per_second': 11.564, 'epoch': 2.0}
{'loss': 1.9285, 'learning_rate': 0.000978902953586498, 'epoch': 2.11}
{'loss': 1.903, 'learning_rate': 0.0009753867791842476, 'epoch': 2.46}
{'loss': 1.9078, 'learning_rate': 0.0009718706047819972, 'epoch': 2.81}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.85440993309021, 'eval_precision': 0.35388794516640876, 'eval_recall': 0.2661401452150995, 'eval_acc': 0.28551336146272854, 'eval_mcc': 0.18282556759997007, 'eval_f1': 0.2366588991505535, 'eval_auc': 0.697366254404388, 'eval_runtime': 28.6965, 'eval_samples_per_second': 49.553, 'eval_steps_per_second': 12.406, 'epoch': 3.0}
{'loss': 1.8588, 'learning_rate': 0.0009683544303797469, 'epoch': 3.16}
{'loss': 1.897, 'learning_rate': 0.0009648382559774966, 'epoch': 3.52}
{'loss': 1.9046, 'learning_rate': 0.0009613220815752461, 'epoch': 3.87}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.842516541481018, 'eval_precision': 0.33406195537367267, 'eval_recall': 0.28141613063051896, 'eval_acc': 0.2981715893108298, 'eval_mcc': 0.19035107247569602, 'eval_f1': 0.2597922122986338, 'eval_auc': 0.6874524289846161, 'eval_runtime': 27.7313, 'eval_samples_per_second': 51.278, 'eval_steps_per_second': 12.837, 'epoch': 4.0}
{'loss': 1.8975, 'learning_rate': 0.0009578059071729957, 'epoch': 4.22}
{'loss': 1.8768, 'learning_rate': 0.0009542897327707455, 'epoch': 4.57}
{'loss': 1.8579, 'learning_rate': 0.0009507735583684951, 'epoch': 4.92}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.851553201675415, 'eval_precision': 0.4091692767844264, 'eval_recall': 0.2838200617243496, 'eval_acc': 0.3059071729957806, 'eval_mcc': 0.20786927493130597, 'eval_f1': 0.27302997555401765, 'eval_auc': 0.6861149218114299, 'eval_runtime': 26.2631, 'eval_samples_per_second': 54.144, 'eval_steps_per_second': 13.555, 'epoch': 5.0}
{'loss': 1.8548, 'learning_rate': 0.0009472573839662447, 'epoch': 5.27}
{'loss': 1.8398, 'learning_rate': 0.0009437412095639943, 'epoch': 5.63}
{'loss': 1.8506, 'learning_rate': 0.0009402250351617441, 'epoch': 5.98}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.7573602199554443, 'eval_precision': 0.39853295105863196, 'eval_recall': 0.306799485906868, 'eval_acc': 0.31856540084388185, 'eval_mcc': 0.22299801851726478, 'eval_f1': 0.3007993437685861, 'eval_auc': 0.7297609941366415, 'eval_runtime': 23.7188, 'eval_samples_per_second': 59.953, 'eval_steps_per_second': 15.009, 'epoch': 6.0}
{'loss': 1.8282, 'learning_rate': 0.0009367088607594937, 'epoch': 6.33}
{'loss': 1.8251, 'learning_rate': 0.0009331926863572433, 'epoch': 6.68}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.7880284786224365, 'eval_precision': 0.367443544141412, 'eval_recall': 0.3140400071482057, 'eval_acc': 0.31926863572433195, 'eval_mcc': 0.21929436774676334, 'eval_f1': 0.3074507631590697, 'eval_auc': 0.7227879138205922, 'eval_runtime': 21.7076, 'eval_samples_per_second': 65.507, 'eval_steps_per_second': 16.4, 'epoch': 7.0}
{'loss': 1.8226, 'learning_rate': 0.000929676511954993, 'epoch': 7.03}
{'loss': 1.8146, 'learning_rate': 0.0009261603375527427, 'epoch': 7.38}
{'loss': 1.8277, 'learning_rate': 0.0009226441631504923, 'epoch': 7.74}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.7576265335083008, 'eval_precision': 0.35684376958958974, 'eval_recall': 0.3247164406524162, 'eval_acc': 0.34458509142053445, 'eval_mcc': 0.24609612226741337, 'eval_f1': 0.3245260274325822, 'eval_auc': 0.729594086904808, 'eval_runtime': 23.2481, 'eval_samples_per_second': 61.166, 'eval_steps_per_second': 15.313, 'epoch': 8.0}
{'loss': 1.7819, 'learning_rate': 0.0009191279887482419, 'epoch': 8.09}
{'loss': 1.748, 'learning_rate': 0.0009156118143459916, 'epoch': 8.44}
{'loss': 1.791, 'learning_rate': 0.0009120956399437413, 'epoch': 8.79}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.7995179891586304, 'eval_precision': 0.41705899986238204, 'eval_recall': 0.32896846306680533, 'eval_acc': 0.34177215189873417, 'eval_mcc': 0.24962954512871158, 'eval_f1': 0.3040070944221117, 'eval_auc': 0.7291483236749476, 'eval_runtime': 25.1115, 'eval_samples_per_second': 56.627, 'eval_steps_per_second': 14.177, 'epoch': 9.0}
{'loss': 1.7792, 'learning_rate': 0.0009085794655414908, 'epoch': 9.14}
{'loss': 1.7351, 'learning_rate': 0.0009050632911392405, 'epoch': 9.49}
{'loss': 1.7735, 'learning_rate': 0.0009015471167369902, 'epoch': 9.85}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.724930763244629, 'eval_precision': 0.43261307776512614, 'eval_recall': 0.3464859361746423, 'eval_acc': 0.35232067510548526, 'eval_mcc': 0.2540916077176538, 'eval_f1': 0.36655901510461875, 'eval_auc': 0.735820387001013, 'eval_runtime': 18.8593, 'eval_samples_per_second': 75.401, 'eval_steps_per_second': 18.877, 'epoch': 10.0}
{'loss': 1.7325, 'learning_rate': 0.0008980309423347398, 'epoch': 10.2}
{'loss': 1.7427, 'learning_rate': 0.0008945147679324894, 'epoch': 10.55}
{'loss': 1.7217, 'learning_rate': 0.0008909985935302391, 'epoch': 10.9}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.7353789806365967, 'eval_precision': 0.44606288671319494, 'eval_recall': 0.3240000555853614, 'eval_acc': 0.329817158931083, 'eval_mcc': 0.2459049338045217, 'eval_f1': 0.3219333969835296, 'eval_auc': 0.7458258928244481, 'eval_runtime': 19.0415, 'eval_samples_per_second': 74.679, 'eval_steps_per_second': 18.696, 'epoch': 11.0}
{'loss': 1.7523, 'learning_rate': 0.0008874824191279888, 'epoch': 11.25}
{'loss': 1.6858, 'learning_rate': 0.0008839662447257384, 'epoch': 11.6}
{'loss': 1.7309, 'learning_rate': 0.0008804500703234881, 'epoch': 11.95}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.700883150100708, 'eval_precision': 0.3984181601771414, 'eval_recall': 0.38173560393313477, 'eval_acc': 0.3755274261603376, 'eval_mcc': 0.28824380159492147, 'eval_f1': 0.37425527816106596, 'eval_auc': 0.7515699671119057, 'eval_runtime': 18.0949, 'eval_samples_per_second': 78.586, 'eval_steps_per_second': 19.674, 'epoch': 12.0}
{'loss': 1.7013, 'learning_rate': 0.0008769338959212377, 'epoch': 12.31}
{'loss': 1.7358, 'learning_rate': 0.0008734177215189874, 'epoch': 12.66}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.7137092351913452, 'eval_precision': 0.3450442969610873, 'eval_recall': 0.3598307784481347, 'eval_acc': 0.35372714486638535, 'eval_mcc': 0.25844050168412885, 'eval_f1': 0.340584852221793, 'eval_auc': 0.7476329809130675, 'eval_runtime': 19.118, 'eval_samples_per_second': 74.38, 'eval_steps_per_second': 18.621, 'epoch': 13.0}
{'loss': 1.7348, 'learning_rate': 0.000869901547116737, 'epoch': 13.01}
{'loss': 1.671, 'learning_rate': 0.0008663853727144867, 'epoch': 13.36}
{'loss': 1.6821, 'learning_rate': 0.0008628691983122364, 'epoch': 13.71}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.7499446868896484, 'eval_precision': 0.464893537799182, 'eval_recall': 0.33047311238886606, 'eval_acc': 0.3431786216596343, 'eval_mcc': 0.2604762903491822, 'eval_f1': 0.338496135197074, 'eval_auc': 0.7447531542776301, 'eval_runtime': 18.8521, 'eval_samples_per_second': 75.429, 'eval_steps_per_second': 18.884, 'epoch': 14.0}
{'loss': 1.7219, 'learning_rate': 0.000859353023909986, 'epoch': 14.06}
{'loss': 1.6648, 'learning_rate': 0.0008558368495077355, 'epoch': 14.42}
{'loss': 1.7015, 'learning_rate': 0.0008523206751054852, 'epoch': 14.77}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.7182934284210205, 'eval_precision': 0.42988392529011726, 'eval_recall': 0.3688683880000311, 'eval_acc': 0.37341772151898733, 'eval_mcc': 0.27975799951890984, 'eval_f1': 0.38350763183408054, 'eval_auc': 0.7461926151057419, 'eval_runtime': 18.8095, 'eval_samples_per_second': 75.6, 'eval_steps_per_second': 18.927, 'epoch': 15.0}
{'loss': 1.6972, 'learning_rate': 0.0008488045007032349, 'epoch': 15.12}
{'loss': 1.7417, 'learning_rate': 0.0008452883263009845, 'epoch': 15.47}
{'loss': 1.6801, 'learning_rate': 0.0008417721518987342, 'epoch': 15.82}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.6921335458755493, 'eval_precision': 0.37622911077702526, 'eval_recall': 0.38802261096490653, 'eval_acc': 0.3790436005625879, 'eval_mcc': 0.28820069830419004, 'eval_f1': 0.3759484535057476, 'eval_auc': 0.7649937366961578, 'eval_runtime': 18.1519, 'eval_samples_per_second': 78.339, 'eval_steps_per_second': 19.612, 'epoch': 16.0}
{'loss': 1.6508, 'learning_rate': 0.0008382559774964838, 'epoch': 16.17}
{'loss': 1.6697, 'learning_rate': 0.0008347398030942335, 'epoch': 16.53}
{'loss': 1.6792, 'learning_rate': 0.0008312236286919831, 'epoch': 16.88}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.6135276556015015, 'eval_precision': 0.477233403692092, 'eval_recall': 0.404447029597213, 'eval_acc': 0.3980309423347398, 'eval_mcc': 0.3173029417909421, 'eval_f1': 0.40490944172850646, 'eval_auc': 0.7834281219529935, 'eval_runtime': 18.8708, 'eval_samples_per_second': 75.354, 'eval_steps_per_second': 18.865, 'epoch': 17.0}
{'loss': 1.6395, 'learning_rate': 0.0008277074542897328, 'epoch': 17.23}
{'loss': 1.6219, 'learning_rate': 0.0008241912798874825, 'epoch': 17.58}
{'loss': 1.6208, 'learning_rate': 0.0008206751054852321, 'epoch': 17.93}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.607864260673523, 'eval_precision': 0.49589452111663146, 'eval_recall': 0.4119725489068819, 'eval_acc': 0.40717299578059074, 'eval_mcc': 0.32146665619846654, 'eval_f1': 0.4307029150131306, 'eval_auc': 0.7826876849101219, 'eval_runtime': 17.8983, 'eval_samples_per_second': 79.449, 'eval_steps_per_second': 19.89, 'epoch': 18.0}
{'loss': 1.5914, 'learning_rate': 0.0008171589310829818, 'epoch': 18.28}
{'loss': 1.638, 'learning_rate': 0.0008136427566807314, 'epoch': 18.64}
{'loss': 1.5871, 'learning_rate': 0.0008101265822784811, 'epoch': 18.99}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.6023666858673096, 'eval_precision': 0.46622147678306225, 'eval_recall': 0.4370162265123495, 'eval_acc': 0.4247538677918425, 'eval_mcc': 0.34030642095309116, 'eval_f1': 0.4411669260794741, 'eval_auc': 0.7889857432566428, 'eval_runtime': 18.7438, 'eval_samples_per_second': 75.865, 'eval_steps_per_second': 18.993, 'epoch': 19.0}
{'loss': 1.6215, 'learning_rate': 0.0008066104078762307, 'epoch': 19.34}
{'loss': 1.5649, 'learning_rate': 0.0008030942334739803, 'epoch': 19.69}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.563122034072876, 'eval_precision': 0.44803578144167416, 'eval_recall': 0.4437904169947262, 'eval_acc': 0.43248945147679324, 'eval_mcc': 0.34724262232903197, 'eval_f1': 0.4429186968457577, 'eval_auc': 0.8000929724893717, 'eval_runtime': 18.0358, 'eval_samples_per_second': 78.843, 'eval_steps_per_second': 19.738, 'epoch': 20.0}
{'loss': 1.5874, 'learning_rate': 0.0007995780590717299, 'epoch': 20.04}
{'loss': 1.5533, 'learning_rate': 0.0007960618846694796, 'epoch': 20.39}
{'loss': 1.5737, 'learning_rate': 0.0007925457102672293, 'epoch': 20.75}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.604105830192566, 'eval_precision': 0.48234974563147204, 'eval_recall': 0.4362047662092164, 'eval_acc': 0.42756680731364277, 'eval_mcc': 0.34861583056209083, 'eval_f1': 0.4317796177299705, 'eval_auc': 0.7900201776771729, 'eval_runtime': 19.1359, 'eval_samples_per_second': 74.311, 'eval_steps_per_second': 18.604, 'epoch': 21.0}
{'loss': 1.5757, 'learning_rate': 0.0007890295358649789, 'epoch': 21.1}
{'loss': 1.5682, 'learning_rate': 0.0007855133614627285, 'epoch': 21.45}
{'loss': 1.5679, 'learning_rate': 0.0007819971870604782, 'epoch': 21.8}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5735143423080444, 'eval_precision': 0.48425042510000826, 'eval_recall': 0.43522674306364895, 'eval_acc': 0.4282700421940928, 'eval_mcc': 0.34397129594089015, 'eval_f1': 0.44809100757047926, 'eval_auc': 0.8007823872357712, 'eval_runtime': 18.5009, 'eval_samples_per_second': 76.861, 'eval_steps_per_second': 19.242, 'epoch': 22.0}
{'loss': 1.5361, 'learning_rate': 0.0007784810126582279, 'epoch': 22.15}
{'loss': 1.5647, 'learning_rate': 0.0007749648382559775, 'epoch': 22.5}
{'loss': 1.5592, 'learning_rate': 0.0007714486638537272, 'epoch': 22.86}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.562341570854187, 'eval_precision': 0.4820525460841153, 'eval_recall': 0.45503528450438235, 'eval_acc': 0.44585091420534456, 'eval_mcc': 0.3632074863975369, 'eval_f1': 0.45967284743989056, 'eval_auc': 0.8036860364534111, 'eval_runtime': 19.2655, 'eval_samples_per_second': 73.811, 'eval_steps_per_second': 18.479, 'epoch': 23.0}
{'loss': 1.5196, 'learning_rate': 0.0007679324894514768, 'epoch': 23.21}
{'loss': 1.5191, 'learning_rate': 0.0007644163150492265, 'epoch': 23.56}
{'loss': 1.5445, 'learning_rate': 0.0007609001406469761, 'epoch': 23.91}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5557928085327148, 'eval_precision': 0.492976086486716, 'eval_recall': 0.4557547903874502, 'eval_acc': 0.44585091420534456, 'eval_mcc': 0.3657416093326619, 'eval_f1': 0.45955038963125466, 'eval_auc': 0.8074702107162615, 'eval_runtime': 18.78, 'eval_samples_per_second': 75.719, 'eval_steps_per_second': 18.956, 'epoch': 24.0}
{'loss': 1.5362, 'learning_rate': 0.0007573839662447258, 'epoch': 24.26}
{'loss': 1.4801, 'learning_rate': 0.0007538677918424755, 'epoch': 24.61}
{'loss': 1.5433, 'learning_rate': 0.000750351617440225, 'epoch': 24.96}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5760431289672852, 'eval_precision': 0.4801966051667784, 'eval_recall': 0.42426482137272714, 'eval_acc': 0.4191279887482419, 'eval_mcc': 0.33272533773470264, 'eval_f1': 0.4334180042198036, 'eval_auc': 0.8046804573411991, 'eval_runtime': 18.9826, 'eval_samples_per_second': 74.911, 'eval_steps_per_second': 18.754, 'epoch': 25.0}
{'loss': 1.5011, 'learning_rate': 0.0007468354430379746, 'epoch': 25.32}
{'loss': 1.4991, 'learning_rate': 0.0007433192686357243, 'epoch': 25.67}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.482191801071167, 'eval_precision': 0.49635028467352976, 'eval_recall': 0.47326306710303767, 'eval_acc': 0.46272855133614627, 'eval_mcc': 0.38272897753068447, 'eval_f1': 0.47625701768674256, 'eval_auc': 0.8164873523541659, 'eval_runtime': 18.0509, 'eval_samples_per_second': 78.777, 'eval_steps_per_second': 19.722, 'epoch': 26.0}
{'loss': 1.5275, 'learning_rate': 0.000739803094233474, 'epoch': 26.02}
{'loss': 1.4767, 'learning_rate': 0.0007362869198312236, 'epoch': 26.37}
{'loss': 1.4991, 'learning_rate': 0.0007327707454289733, 'epoch': 26.72}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.517126202583313, 'eval_precision': 0.502142413231254, 'eval_recall': 0.48038990300113293, 'eval_acc': 0.4641350210970464, 'eval_mcc': 0.38456161895657803, 'eval_f1': 0.4863847806668811, 'eval_auc': 0.8097210258157325, 'eval_runtime': 19.0045, 'eval_samples_per_second': 74.825, 'eval_steps_per_second': 18.732, 'epoch': 27.0}
{'loss': 1.5237, 'learning_rate': 0.000729254571026723, 'epoch': 27.07}
{'loss': 1.4805, 'learning_rate': 0.0007257383966244726, 'epoch': 27.43}
{'loss': 1.5079, 'learning_rate': 0.0007222222222222222, 'epoch': 27.78}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5174651145935059, 'eval_precision': 0.5126821313555899, 'eval_recall': 0.46628561327965, 'eval_acc': 0.45850914205344584, 'eval_mcc': 0.37973153983142877, 'eval_f1': 0.4763813962651261, 'eval_auc': 0.8140604974143949, 'eval_runtime': 18.0144, 'eval_samples_per_second': 78.937, 'eval_steps_per_second': 19.762, 'epoch': 28.0}
{'loss': 1.4567, 'learning_rate': 0.0007187060478199719, 'epoch': 28.13}
{'loss': 1.4864, 'learning_rate': 0.0007151898734177216, 'epoch': 28.48}
{'loss': 1.4895, 'learning_rate': 0.0007116736990154712, 'epoch': 28.83}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5271145105361938, 'eval_precision': 0.5047187097859518, 'eval_recall': 0.4555102011107048, 'eval_acc': 0.44585091420534456, 'eval_mcc': 0.3635272979416344, 'eval_f1': 0.46810281021535416, 'eval_auc': 0.8178665561780785, 'eval_runtime': 19.6663, 'eval_samples_per_second': 72.306, 'eval_steps_per_second': 18.102, 'epoch': 29.0}
{'loss': 1.4582, 'learning_rate': 0.0007081575246132208, 'epoch': 29.18}
{'loss': 1.4719, 'learning_rate': 0.0007046413502109706, 'epoch': 29.54}
{'loss': 1.4263, 'learning_rate': 0.0007011251758087202, 'epoch': 29.89}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4857852458953857, 'eval_precision': 0.4975283972804319, 'eval_recall': 0.4643025612477526, 'eval_acc': 0.4507735583684951, 'eval_mcc': 0.3724332087523173, 'eval_f1': 0.4716939625886035, 'eval_auc': 0.8243280431824972, 'eval_runtime': 18.5884, 'eval_samples_per_second': 76.5, 'eval_steps_per_second': 19.152, 'epoch': 30.0}
{'loss': 1.4061, 'learning_rate': 0.0006976090014064697, 'epoch': 30.24}
{'loss': 1.4485, 'learning_rate': 0.0006940928270042193, 'epoch': 30.59}
{'loss': 1.4335, 'learning_rate': 0.0006905766526019691, 'epoch': 30.94}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5035771131515503, 'eval_precision': 0.47853348319301314, 'eval_recall': 0.47860237657953286, 'eval_acc': 0.4648382559774965, 'eval_mcc': 0.3878075648655349, 'eval_f1': 0.4703290124495931, 'eval_auc': 0.8196592903841408, 'eval_runtime': 19.4809, 'eval_samples_per_second': 72.994, 'eval_steps_per_second': 18.274, 'epoch': 31.0}
{'loss': 1.4441, 'learning_rate': 0.0006870604781997187, 'epoch': 31.29}
{'loss': 1.4407, 'learning_rate': 0.0006835443037974683, 'epoch': 31.65}
{'loss': 1.4491, 'learning_rate': 0.000680028129395218, 'epoch': 32.0}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4612845182418823, 'eval_precision': 0.5228708958878858, 'eval_recall': 0.49235033529681943, 'eval_acc': 0.48241912798874825, 'eval_mcc': 0.40575677843783, 'eval_f1': 0.49838445745682625, 'eval_auc': 0.8291931041658539, 'eval_runtime': 18.8082, 'eval_samples_per_second': 75.605, 'eval_steps_per_second': 18.928, 'epoch': 32.0}
{'loss': 1.4151, 'learning_rate': 0.0006765119549929677, 'epoch': 32.35}
{'loss': 1.431, 'learning_rate': 0.0006729957805907173, 'epoch': 32.7}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4992343187332153, 'eval_precision': 0.47047083802629364, 'eval_recall': 0.4712412488015486, 'eval_acc': 0.45921237693389594, 'eval_mcc': 0.382080275816448, 'eval_f1': 0.45819555036199006, 'eval_auc': 0.8266963446866321, 'eval_runtime': 18.3269, 'eval_samples_per_second': 77.591, 'eval_steps_per_second': 19.425, 'epoch': 33.0}
{'loss': 1.4396, 'learning_rate': 0.0006694796061884669, 'epoch': 33.05}
{'loss': 1.4046, 'learning_rate': 0.0006659634317862167, 'epoch': 33.4}
{'loss': 1.4317, 'learning_rate': 0.0006624472573839663, 'epoch': 33.76}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.451066493988037, 'eval_precision': 0.507567005170191, 'eval_recall': 0.4887411909060948, 'eval_acc': 0.4732770745428973, 'eval_mcc': 0.39643308520482873, 'eval_f1': 0.4932767055998501, 'eval_auc': 0.8298621927545001, 'eval_runtime': 17.1144, 'eval_samples_per_second': 83.088, 'eval_steps_per_second': 20.801, 'epoch': 34.0}
{'loss': 1.3767, 'learning_rate': 0.0006589310829817159, 'epoch': 34.11}
{'loss': 1.3976, 'learning_rate': 0.0006554149085794655, 'epoch': 34.46}
{'loss': 1.3952, 'learning_rate': 0.0006518987341772153, 'epoch': 34.81}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4682493209838867, 'eval_precision': 0.5066756802453688, 'eval_recall': 0.48323693403329004, 'eval_acc': 0.4711673699015471, 'eval_mcc': 0.3928881172941841, 'eval_f1': 0.4873179369518465, 'eval_auc': 0.8294253282991002, 'eval_runtime': 17.2081, 'eval_samples_per_second': 82.636, 'eval_steps_per_second': 20.688, 'epoch': 35.0}
{'loss': 1.4207, 'learning_rate': 0.0006483825597749649, 'epoch': 35.16}
{'loss': 1.3655, 'learning_rate': 0.0006448663853727144, 'epoch': 35.51}
{'loss': 1.4024, 'learning_rate': 0.0006413502109704642, 'epoch': 35.86}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4663234949111938, 'eval_precision': 0.5227155293689909, 'eval_recall': 0.49669985600085914, 'eval_acc': 0.4831223628691983, 'eval_mcc': 0.4067938785814625, 'eval_f1': 0.503786409537479, 'eval_auc': 0.8314634754408882, 'eval_runtime': 17.3594, 'eval_samples_per_second': 81.915, 'eval_steps_per_second': 20.508, 'epoch': 36.0}
{'loss': 1.3797, 'learning_rate': 0.0006378340365682138, 'epoch': 36.22}
{'loss': 1.3493, 'learning_rate': 0.0006343178621659634, 'epoch': 36.57}
{'loss': 1.4186, 'learning_rate': 0.000630801687763713, 'epoch': 36.92}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4580519199371338, 'eval_precision': 0.5260460777997951, 'eval_recall': 0.512800002526347, 'eval_acc': 0.5056258790436006, 'eval_mcc': 0.43167538652414106, 'eval_f1': 0.5111740852901955, 'eval_auc': 0.838267075026956, 'eval_runtime': 17.1464, 'eval_samples_per_second': 82.933, 'eval_steps_per_second': 20.762, 'epoch': 37.0}
{'loss': 1.3557, 'learning_rate': 0.0006272855133614628, 'epoch': 37.27}
{'loss': 1.3817, 'learning_rate': 0.0006237693389592124, 'epoch': 37.62}
{'loss': 1.3657, 'learning_rate': 0.000620253164556962, 'epoch': 37.97}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4923348426818848, 'eval_precision': 0.511083272783613, 'eval_recall': 0.4889767389125295, 'eval_acc': 0.48171589310829815, 'eval_mcc': 0.40528164443858905, 'eval_f1': 0.4893206055025204, 'eval_auc': 0.8285167081135968, 'eval_runtime': 19.4976, 'eval_samples_per_second': 72.932, 'eval_steps_per_second': 18.259, 'epoch': 38.0}
{'loss': 1.3661, 'learning_rate': 0.0006167369901547117, 'epoch': 38.33}
{'loss': 1.3368, 'learning_rate': 0.0006132208157524614, 'epoch': 38.68}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4363280534744263, 'eval_precision': 0.539657840366203, 'eval_recall': 0.5215044330784665, 'eval_acc': 0.5119549929676512, 'eval_mcc': 0.4395204894954249, 'eval_f1': 0.5246435534659364, 'eval_auc': 0.8385624766608775, 'eval_runtime': 18.596, 'eval_samples_per_second': 76.468, 'eval_steps_per_second': 19.144, 'epoch': 39.0}
{'loss': 1.3546, 'learning_rate': 0.000609704641350211, 'epoch': 39.03}
{'loss': 1.3304, 'learning_rate': 0.0006061884669479606, 'epoch': 39.38}
{'loss': 1.3567, 'learning_rate': 0.0006026722925457104, 'epoch': 39.73}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4743773937225342, 'eval_precision': 0.519629044848923, 'eval_recall': 0.5094298985438217, 'eval_acc': 0.49929676511954996, 'eval_mcc': 0.4265109316757718, 'eval_f1': 0.5039439537572714, 'eval_auc': 0.8356423774638662, 'eval_runtime': 19.9753, 'eval_samples_per_second': 71.188, 'eval_steps_per_second': 17.822, 'epoch': 40.0}
{'loss': 1.3301, 'learning_rate': 0.00059915611814346, 'epoch': 40.08}
{'loss': 1.3382, 'learning_rate': 0.0005956399437412096, 'epoch': 40.44}
{'loss': 1.3713, 'learning_rate': 0.0005921237693389591, 'epoch': 40.79}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4537014961242676, 'eval_precision': 0.5222336128460212, 'eval_recall': 0.4997808125133001, 'eval_acc': 0.4866385372714487, 'eval_mcc': 0.4139360761398735, 'eval_f1': 0.4946831738226305, 'eval_auc': 0.8377984912071057, 'eval_runtime': 18.7784, 'eval_samples_per_second': 75.725, 'eval_steps_per_second': 18.958, 'epoch': 41.0}
{'loss': 1.2875, 'learning_rate': 0.0005886075949367089, 'epoch': 41.14}
{'loss': 1.334, 'learning_rate': 0.0005850914205344585, 'epoch': 41.49}
{'loss': 1.3244, 'learning_rate': 0.0005815752461322081, 'epoch': 41.84}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4170002937316895, 'eval_precision': 0.5510711876910737, 'eval_recall': 0.5206269031176021, 'eval_acc': 0.5112517580872011, 'eval_mcc': 0.43881851609650685, 'eval_f1': 0.5256738498958297, 'eval_auc': 0.8447337588091016, 'eval_runtime': 21.5029, 'eval_samples_per_second': 66.131, 'eval_steps_per_second': 16.556, 'epoch': 42.0}
{'loss': 1.2989, 'learning_rate': 0.0005780590717299578, 'epoch': 42.19}
{'loss': 1.297, 'learning_rate': 0.0005745428973277075, 'epoch': 42.55}
{'loss': 1.3417, 'learning_rate': 0.0005710267229254571, 'epoch': 42.9}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4420814514160156, 'eval_precision': 0.5540838494339679, 'eval_recall': 0.5157850129815305, 'eval_acc': 0.510548523206751, 'eval_mcc': 0.4383030640140304, 'eval_f1': 0.5166528490117315, 'eval_auc': 0.8417538683821107, 'eval_runtime': 18.2225, 'eval_samples_per_second': 78.035, 'eval_steps_per_second': 19.536, 'epoch': 43.0}
{'loss': 1.279, 'learning_rate': 0.0005675105485232067, 'epoch': 43.25}
{'loss': 1.3258, 'learning_rate': 0.0005639943741209564, 'epoch': 43.6}
{'loss': 1.2855, 'learning_rate': 0.0005604781997187061, 'epoch': 43.95}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4592572450637817, 'eval_precision': 0.49707610739041874, 'eval_recall': 0.5164198483378707, 'eval_acc': 0.5042194092827004, 'eval_mcc': 0.4311855841297149, 'eval_f1': 0.4988678817286224, 'eval_auc': 0.8423399629996818, 'eval_runtime': 19.2391, 'eval_samples_per_second': 73.912, 'eval_steps_per_second': 18.504, 'epoch': 44.0}
{'loss': 1.2754, 'learning_rate': 0.0005569620253164557, 'epoch': 44.3}
{'loss': 1.3009, 'learning_rate': 0.0005534458509142054, 'epoch': 44.66}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4098607301712036, 'eval_precision': 0.5562793782102282, 'eval_recall': 0.5288151690189618, 'eval_acc': 0.5203938115330521, 'eval_mcc': 0.44863717012073556, 'eval_f1': 0.5377671260273851, 'eval_auc': 0.8449401675145467, 'eval_runtime': 18.3919, 'eval_samples_per_second': 77.317, 'eval_steps_per_second': 19.356, 'epoch': 45.0}
{'loss': 1.31, 'learning_rate': 0.0005499296765119551, 'epoch': 45.01}
{'loss': 1.2158, 'learning_rate': 0.0005464135021097047, 'epoch': 45.36}
{'loss': 1.3101, 'learning_rate': 0.0005428973277074542, 'epoch': 45.71}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4150826930999756, 'eval_precision': 0.5325384316380117, 'eval_recall': 0.5204086506237637, 'eval_acc': 0.5084388185654009, 'eval_mcc': 0.43695351660323634, 'eval_f1': 0.5179573583171742, 'eval_auc': 0.8488883560402918, 'eval_runtime': 19.3306, 'eval_samples_per_second': 73.562, 'eval_steps_per_second': 18.416, 'epoch': 46.0}
{'loss': 1.2513, 'learning_rate': 0.0005393811533052039, 'epoch': 46.06}
{'loss': 1.2147, 'learning_rate': 0.0005358649789029536, 'epoch': 46.41}
{'loss': 1.2552, 'learning_rate': 0.0005323488045007032, 'epoch': 46.77}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.397150993347168, 'eval_precision': 0.5600870123131, 'eval_recall': 0.5291966883884067, 'eval_acc': 0.5189873417721519, 'eval_mcc': 0.44826970893539014, 'eval_f1': 0.5374725422691762, 'eval_auc': 0.8502312824087601, 'eval_runtime': 18.3992, 'eval_samples_per_second': 77.286, 'eval_steps_per_second': 19.349, 'epoch': 47.0}
{'loss': 1.2891, 'learning_rate': 0.0005288326300984529, 'epoch': 47.12}
{'loss': 1.2608, 'learning_rate': 0.0005253164556962025, 'epoch': 47.47}
{'loss': 1.2318, 'learning_rate': 0.0005218002812939522, 'epoch': 47.82}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4091064929962158, 'eval_precision': 0.5280699035567162, 'eval_recall': 0.5228434138325466, 'eval_acc': 0.5147679324894515, 'eval_mcc': 0.4417120410343488, 'eval_f1': 0.5199558708050188, 'eval_auc': 0.8501111299433065, 'eval_runtime': 19.4325, 'eval_samples_per_second': 73.176, 'eval_steps_per_second': 18.32, 'epoch': 48.0}
{'loss': 1.2361, 'learning_rate': 0.0005182841068917018, 'epoch': 48.17}
{'loss': 1.2257, 'learning_rate': 0.0005147679324894515, 'epoch': 48.52}
{'loss': 1.2475, 'learning_rate': 0.0005112517580872012, 'epoch': 48.87}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4007015228271484, 'eval_precision': 0.5731174980010351, 'eval_recall': 0.530562549391715, 'eval_acc': 0.5218002812939522, 'eval_mcc': 0.4504338906592354, 'eval_f1': 0.5453135709390919, 'eval_auc': 0.8524344647581664, 'eval_runtime': 18.372, 'eval_samples_per_second': 77.401, 'eval_steps_per_second': 19.377, 'epoch': 49.0}
{'loss': 1.2272, 'learning_rate': 0.0005077355836849508, 'epoch': 49.23}
{'loss': 1.1934, 'learning_rate': 0.0005042194092827004, 'epoch': 49.58}
{'loss': 1.233, 'learning_rate': 0.0005007032348804501, 'epoch': 49.93}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4031349420547485, 'eval_precision': 0.5614332549040104, 'eval_recall': 0.5401989695749942, 'eval_acc': 0.5295358649789029, 'eval_mcc': 0.4588731258234439, 'eval_f1': 0.5466906359560961, 'eval_auc': 0.8521858303980938, 'eval_runtime': 19.3952, 'eval_samples_per_second': 73.317, 'eval_steps_per_second': 18.355, 'epoch': 50.0}
{'loss': 1.1929, 'learning_rate': 0.0004971870604781998, 'epoch': 50.28}
{'loss': 1.2098, 'learning_rate': 0.0004936708860759494, 'epoch': 50.63}
{'loss': 1.2438, 'learning_rate': 0.000490154711673699, 'epoch': 50.98}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.3959399461746216, 'eval_precision': 0.5704086762813638, 'eval_recall': 0.5541522379369879, 'eval_acc': 0.5421940928270043, 'eval_mcc': 0.4737607400429754, 'eval_f1': 0.5593700131445998, 'eval_auc': 0.851594654304653, 'eval_runtime': 18.7081, 'eval_samples_per_second': 76.01, 'eval_steps_per_second': 19.029, 'epoch': 51.0}
{'loss': 1.1655, 'learning_rate': 0.0004866385372714487, 'epoch': 51.34}
{'loss': 1.2085, 'learning_rate': 0.0004831223628691983, 'epoch': 51.69}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.402080774307251, 'eval_precision': 0.5568673280409235, 'eval_recall': 0.5372891918507267, 'eval_acc': 0.5288326300984529, 'eval_mcc': 0.45740428821179985, 'eval_f1': 0.5419102436687004, 'eval_auc': 0.854277679668864, 'eval_runtime': 19.5444, 'eval_samples_per_second': 72.757, 'eval_steps_per_second': 18.215, 'epoch': 52.0}
{'loss': 1.2094, 'learning_rate': 0.000479606188466948, 'epoch': 52.04}
{'loss': 1.1435, 'learning_rate': 0.00047609001406469757, 'epoch': 52.39}
{'loss': 1.2013, 'learning_rate': 0.00047257383966244727, 'epoch': 52.74}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4316695928573608, 'eval_precision': 0.5504878480358374, 'eval_recall': 0.5373596898500237, 'eval_acc': 0.5274261603375527, 'eval_mcc': 0.45716776300492645, 'eval_f1': 0.5395035066081311, 'eval_auc': 0.8530083624241708, 'eval_runtime': 18.6934, 'eval_samples_per_second': 76.069, 'eval_steps_per_second': 19.044, 'epoch': 53.0}
{'loss': 1.2148, 'learning_rate': 0.00046905766526019697, 'epoch': 53.09}
{'loss': 1.1328, 'learning_rate': 0.00046554149085794655, 'epoch': 53.45}
{'loss': 1.1621, 'learning_rate': 0.00046202531645569625, 'epoch': 53.8}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.3736670017242432, 'eval_precision': 0.5721069915768832, 'eval_recall': 0.5592362247512178, 'eval_acc': 0.5485232067510548, 'eval_mcc': 0.48251194977230705, 'eval_f1': 0.5576067484170963, 'eval_auc': 0.8594834669399916, 'eval_runtime': 19.9718, 'eval_samples_per_second': 71.2, 'eval_steps_per_second': 17.825, 'epoch': 54.0}
{'loss': 1.174, 'learning_rate': 0.00045850914205344584, 'epoch': 54.15}
{'loss': 1.1715, 'learning_rate': 0.00045499296765119554, 'epoch': 54.5}
{'loss': 1.1357, 'learning_rate': 0.0004514767932489451, 'epoch': 54.85}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.3903346061706543, 'eval_precision': 0.584890975268281, 'eval_recall': 0.5527453046126253, 'eval_acc': 0.5436005625879043, 'eval_mcc': 0.47451399778874387, 'eval_f1': 0.5650687551862751, 'eval_auc': 0.8572266339161682, 'eval_runtime': 18.7758, 'eval_samples_per_second': 75.736, 'eval_steps_per_second': 18.961, 'epoch': 55.0}
{'loss': 1.1106, 'learning_rate': 0.0004479606188466948, 'epoch': 55.2}
{'loss': 1.1233, 'learning_rate': 0.0004444444444444444, 'epoch': 55.56}
{'loss': 1.1513, 'learning_rate': 0.0004409282700421941, 'epoch': 55.91}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.3806627988815308, 'eval_precision': 0.5763259895650068, 'eval_recall': 0.5490689417170848, 'eval_acc': 0.5407876230661041, 'eval_mcc': 0.4717861396481543, 'eval_f1': 0.557103143703263, 'eval_auc': 0.8568770324921399, 'eval_runtime': 19.8117, 'eval_samples_per_second': 71.776, 'eval_steps_per_second': 17.969, 'epoch': 56.0}
{'loss': 1.1291, 'learning_rate': 0.00043741209563994375, 'epoch': 56.26}
{'loss': 1.1147, 'learning_rate': 0.0004338959212376934, 'epoch': 56.61}
{'loss': 1.1403, 'learning_rate': 0.0004303797468354431, 'epoch': 56.96}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4324883222579956, 'eval_precision': 0.5682335171996793, 'eval_recall': 0.5489389283913968, 'eval_acc': 0.5372714486638537, 'eval_mcc': 0.46799134478502363, 'eval_f1': 0.5562130326355319, 'eval_auc': 0.8588029920873368, 'eval_runtime': 19.1256, 'eval_samples_per_second': 74.351, 'eval_steps_per_second': 18.614, 'epoch': 57.0}
{'loss': 1.1077, 'learning_rate': 0.0004268635724331927, 'epoch': 57.31}
{'loss': 1.1616, 'learning_rate': 0.0004233473980309424, 'epoch': 57.67}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.3717366456985474, 'eval_precision': 0.5836390191078297, 'eval_recall': 0.563750289080876, 'eval_acc': 0.5576652601969058, 'eval_mcc': 0.4905610364487135, 'eval_f1': 0.5704170585293276, 'eval_auc': 0.8607853146187091, 'eval_runtime': 19.9484, 'eval_samples_per_second': 71.284, 'eval_steps_per_second': 17.846, 'epoch': 58.0}
{'loss': 1.1219, 'learning_rate': 0.00041983122362869197, 'epoch': 58.02}
{'loss': 1.0636, 'learning_rate': 0.00041631504922644166, 'epoch': 58.37}
{'loss': 1.1316, 'learning_rate': 0.00041279887482419125, 'epoch': 58.72}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.402068018913269, 'eval_precision': 0.5679970308703302, 'eval_recall': 0.5567142549680024, 'eval_acc': 0.5457102672292545, 'eval_mcc': 0.47728795346906894, 'eval_f1': 0.5600986877885694, 'eval_auc': 0.8596065783106142, 'eval_runtime': 18.9484, 'eval_samples_per_second': 75.046, 'eval_steps_per_second': 18.788, 'epoch': 59.0}
{'loss': 1.1297, 'learning_rate': 0.00040928270042194095, 'epoch': 59.07}
{'loss': 1.0984, 'learning_rate': 0.0004057665260196906, 'epoch': 59.42}
{'loss': 1.0955, 'learning_rate': 0.00040225035161744023, 'epoch': 59.77}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4144920110702515, 'eval_precision': 0.5680763488295015, 'eval_recall': 0.5586104678380778, 'eval_acc': 0.5492264416315049, 'eval_mcc': 0.48111074802106263, 'eval_f1': 0.5614839002448276, 'eval_auc': 0.8585087003451459, 'eval_runtime': 19.5074, 'eval_samples_per_second': 72.895, 'eval_steps_per_second': 18.25, 'epoch': 60.0}
{'loss': 1.0649, 'learning_rate': 0.0003987341772151899, 'epoch': 60.13}
{'loss': 1.0921, 'learning_rate': 0.0003952180028129395, 'epoch': 60.48}
{'loss': 1.0588, 'learning_rate': 0.00039170182841068916, 'epoch': 60.83}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.396018385887146, 'eval_precision': 0.5738660249784546, 'eval_recall': 0.5568656398972887, 'eval_acc': 0.5457102672292545, 'eval_mcc': 0.4795036202444034, 'eval_f1': 0.5563808167447788, 'eval_auc': 0.8619478506784567, 'eval_runtime': 18.5849, 'eval_samples_per_second': 76.514, 'eval_steps_per_second': 19.155, 'epoch': 61.0}
{'loss': 1.0691, 'learning_rate': 0.0003881856540084388, 'epoch': 61.18}
{'loss': 1.0342, 'learning_rate': 0.0003846694796061885, 'epoch': 61.53}
{'loss': 1.1237, 'learning_rate': 0.00038115330520393815, 'epoch': 61.88}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.385538101196289, 'eval_precision': 0.5671563136931347, 'eval_recall': 0.5632516783471461, 'eval_acc': 0.5520393811533052, 'eval_mcc': 0.4847070954056545, 'eval_f1': 0.5621985841635808, 'eval_auc': 0.8656512667983363, 'eval_runtime': 19.0613, 'eval_samples_per_second': 74.601, 'eval_steps_per_second': 18.677, 'epoch': 62.0}
{'loss': 1.044, 'learning_rate': 0.0003776371308016878, 'epoch': 62.24}
{'loss': 1.0725, 'learning_rate': 0.00037412095639943743, 'epoch': 62.59}
{'loss': 1.0912, 'learning_rate': 0.0003706047819971871, 'epoch': 62.94}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4136097431182861, 'eval_precision': 0.5867875241311513, 'eval_recall': 0.5705980440176163, 'eval_acc': 0.5590717299578059, 'eval_mcc': 0.49280499603383526, 'eval_f1': 0.5763200965990439, 'eval_auc': 0.8645751671042419, 'eval_runtime': 18.5794, 'eval_samples_per_second': 76.536, 'eval_steps_per_second': 19.161, 'epoch': 63.0}
{'loss': 1.0138, 'learning_rate': 0.0003670886075949367, 'epoch': 63.29}
{'loss': 1.0442, 'learning_rate': 0.00036357243319268636, 'epoch': 63.64}
{'loss': 1.0606, 'learning_rate': 0.000360056258790436, 'epoch': 63.99}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.410220742225647, 'eval_precision': 0.5829309409127112, 'eval_recall': 0.5647006416899556, 'eval_acc': 0.5520393811533052, 'eval_mcc': 0.48756755698953214, 'eval_f1': 0.5642305768758091, 'eval_auc': 0.86470485180073, 'eval_runtime': 19.701, 'eval_samples_per_second': 72.179, 'eval_steps_per_second': 18.07, 'epoch': 64.0}
{'loss': 1.017, 'learning_rate': 0.00035654008438818565, 'epoch': 64.35}
{'loss': 1.0627, 'learning_rate': 0.0003530239099859353, 'epoch': 64.7}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.3874014616012573, 'eval_precision': 0.5796813795127127, 'eval_recall': 0.5662857001338418, 'eval_acc': 0.5576652601969058, 'eval_mcc': 0.4905578800973145, 'eval_f1': 0.571388446212475, 'eval_auc': 0.8630550690632991, 'eval_runtime': 17.6048, 'eval_samples_per_second': 80.773, 'eval_steps_per_second': 20.222, 'epoch': 65.0}
{'loss': 1.0515, 'learning_rate': 0.000349507735583685, 'epoch': 65.05}
{'loss': 0.9331, 'learning_rate': 0.0003459915611814346, 'epoch': 65.4}
{'loss': 1.0457, 'learning_rate': 0.00034247538677918427, 'epoch': 65.75}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.3949052095413208, 'eval_precision': 0.6032362077502527, 'eval_recall': 0.5754341149558743, 'eval_acc': 0.5675105485232067, 'eval_mcc': 0.5018348228799606, 'eval_f1': 0.5869362949432788, 'eval_auc': 0.862965506638434, 'eval_runtime': 17.6579, 'eval_samples_per_second': 80.53, 'eval_steps_per_second': 20.161, 'epoch': 66.0}
{'loss': 1.0077, 'learning_rate': 0.0003389592123769339, 'epoch': 66.1}
{'loss': 0.9724, 'learning_rate': 0.00033544303797468356, 'epoch': 66.46}
{'loss': 1.0023, 'learning_rate': 0.0003319268635724332, 'epoch': 66.81}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4067778587341309, 'eval_precision': 0.5850429276358889, 'eval_recall': 0.5702701536651491, 'eval_acc': 0.5632911392405063, 'eval_mcc': 0.4968758293029953, 'eval_f1': 0.5744577916084374, 'eval_auc': 0.8650168035927892, 'eval_runtime': 18.6851, 'eval_samples_per_second': 76.103, 'eval_steps_per_second': 19.053, 'epoch': 67.0}
{'loss': 1.0253, 'learning_rate': 0.00032841068917018284, 'epoch': 67.16}
{'loss': 0.9993, 'learning_rate': 0.0003248945147679325, 'epoch': 67.51}
{'loss': 0.9981, 'learning_rate': 0.00032137834036568213, 'epoch': 67.86}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.44417142868042, 'eval_precision': 0.5850753249536937, 'eval_recall': 0.5626608341654511, 'eval_acc': 0.5548523206751055, 'eval_mcc': 0.4875404554414611, 'eval_f1': 0.569860875088778, 'eval_auc': 0.8626981580183911, 'eval_runtime': 17.6199, 'eval_samples_per_second': 80.704, 'eval_steps_per_second': 20.204, 'epoch': 68.0}
{'loss': 0.9685, 'learning_rate': 0.0003178621659634318, 'epoch': 68.21}
{'loss': 0.9748, 'learning_rate': 0.0003143459915611814, 'epoch': 68.57}
{'loss': 1.0155, 'learning_rate': 0.0003108298171589311, 'epoch': 68.92}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4529811143875122, 'eval_precision': 0.5812734125874974, 'eval_recall': 0.571287923955702, 'eval_acc': 0.5618846694796061, 'eval_mcc': 0.49586506394246577, 'eval_f1': 0.5738355575896112, 'eval_auc': 0.8631819279320286, 'eval_runtime': 17.544, 'eval_samples_per_second': 81.053, 'eval_steps_per_second': 20.292, 'epoch': 69.0}
{'loss': 0.9508, 'learning_rate': 0.0003073136427566807, 'epoch': 69.27}
{'loss': 0.9645, 'learning_rate': 0.0003037974683544304, 'epoch': 69.62}
{'loss': 0.9712, 'learning_rate': 0.00030028129395218, 'epoch': 69.97}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4401381015777588, 'eval_precision': 0.5662986729646036, 'eval_recall': 0.5579179876585831, 'eval_acc': 0.5485232067510548, 'eval_mcc': 0.47998687290931624, 'eval_f1': 0.5608900417193325, 'eval_auc': 0.8643600129399486, 'eval_runtime': 18.2238, 'eval_samples_per_second': 78.03, 'eval_steps_per_second': 19.535, 'epoch': 70.0}
{'loss': 0.9446, 'learning_rate': 0.0002967651195499297, 'epoch': 70.32}
{'loss': 0.9948, 'learning_rate': 0.0002932489451476794, 'epoch': 70.68}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.45790433883667, 'eval_precision': 0.5743236229945468, 'eval_recall': 0.5646745195100148, 'eval_acc': 0.5562587904360057, 'eval_mcc': 0.48933725492706137, 'eval_f1': 0.5663183220278614, 'eval_auc': 0.8631147963428847, 'eval_runtime': 17.3797, 'eval_samples_per_second': 81.819, 'eval_steps_per_second': 20.484, 'epoch': 71.0}
{'loss': 0.9694, 'learning_rate': 0.00028973277074542897, 'epoch': 71.03}
{'loss': 0.9274, 'learning_rate': 0.00028621659634317867, 'epoch': 71.38}
{'loss': 0.9351, 'learning_rate': 0.00028270042194092826, 'epoch': 71.73}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4167894124984741, 'eval_precision': 0.5716827239301987, 'eval_recall': 0.568389070480362, 'eval_acc': 0.5590717299578059, 'eval_mcc': 0.4924002409820078, 'eval_f1': 0.5685523586925944, 'eval_auc': 0.8675649492838893, 'eval_runtime': 17.5553, 'eval_samples_per_second': 81.001, 'eval_steps_per_second': 20.279, 'epoch': 72.0}
{'loss': 0.9517, 'learning_rate': 0.00027918424753867795, 'epoch': 72.08}
{'loss': 0.9176, 'learning_rate': 0.00027566807313642754, 'epoch': 72.43}
{'loss': 0.9101, 'learning_rate': 0.00027215189873417724, 'epoch': 72.78}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4532585144042969, 'eval_precision': 0.5757340933778036, 'eval_recall': 0.567455879788302, 'eval_acc': 0.5583684950773559, 'eval_mcc': 0.4918522750604743, 'eval_f1': 0.5687503085232429, 'eval_auc': 0.8654987085960567, 'eval_runtime': 18.0987, 'eval_samples_per_second': 78.569, 'eval_steps_per_second': 19.67, 'epoch': 73.0}
{'loss': 0.9183, 'learning_rate': 0.00026863572433192683, 'epoch': 73.14}
{'loss': 0.9137, 'learning_rate': 0.0002651195499296765, 'epoch': 73.49}
{'loss': 0.9378, 'learning_rate': 0.00026160337552742617, 'epoch': 73.84}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4113566875457764, 'eval_precision': 0.5846051614441925, 'eval_recall': 0.5805987655196152, 'eval_acc': 0.570323488045007, 'eval_mcc': 0.5050338856274521, 'eval_f1': 0.5820006990308759, 'eval_auc': 0.8678866807264202, 'eval_runtime': 17.6099, 'eval_samples_per_second': 80.75, 'eval_steps_per_second': 20.216, 'epoch': 74.0}
{'loss': 0.9208, 'learning_rate': 0.0002580872011251758, 'epoch': 74.19}
{'loss': 0.8978, 'learning_rate': 0.0002545710267229255, 'epoch': 74.54}
{'loss': 0.9161, 'learning_rate': 0.0002510548523206751, 'epoch': 74.89}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4556642770767212, 'eval_precision': 0.5848487305975206, 'eval_recall': 0.5742150118811425, 'eval_acc': 0.5639943741209564, 'eval_mcc': 0.4991644316966354, 'eval_f1': 0.5746425083331057, 'eval_auc': 0.867301099209936, 'eval_runtime': 17.5633, 'eval_samples_per_second': 80.964, 'eval_steps_per_second': 20.269, 'epoch': 75.0}
{'loss': 0.902, 'learning_rate': 0.0002475386779184248, 'epoch': 75.25}
{'loss': 0.8874, 'learning_rate': 0.0002440225035161744, 'epoch': 75.6}
{'loss': 0.8912, 'learning_rate': 0.00024050632911392405, 'epoch': 75.95}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.478397011756897, 'eval_precision': 0.5817894232372324, 'eval_recall': 0.5729655421673842, 'eval_acc': 0.5639943741209564, 'eval_mcc': 0.4978715236289024, 'eval_f1': 0.5750804749281574, 'eval_auc': 0.8669048248075446, 'eval_runtime': 18.6198, 'eval_samples_per_second': 76.37, 'eval_steps_per_second': 19.119, 'epoch': 76.0}
{'loss': 0.8661, 'learning_rate': 0.0002369901547116737, 'epoch': 76.3}
{'loss': 0.8537, 'learning_rate': 0.00023347398030942334, 'epoch': 76.65}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.443872094154358, 'eval_precision': 0.5794230939129303, 'eval_recall': 0.5709941290486276, 'eval_acc': 0.559774964838256, 'eval_mcc': 0.4948923175642213, 'eval_f1': 0.5693216620710844, 'eval_auc': 0.8673580184428918, 'eval_runtime': 17.6049, 'eval_samples_per_second': 80.773, 'eval_steps_per_second': 20.222, 'epoch': 77.0}
{'loss': 0.9139, 'learning_rate': 0.000229957805907173, 'epoch': 77.0}
{'loss': 0.8831, 'learning_rate': 0.00022644163150492265, 'epoch': 77.36}
{'loss': 0.862, 'learning_rate': 0.0002229254571026723, 'epoch': 77.71}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4741461277008057, 'eval_precision': 0.5945997111399332, 'eval_recall': 0.5759662471133533, 'eval_acc': 0.5654008438818565, 'eval_mcc': 0.49993413860154584, 'eval_f1': 0.5835054498224417, 'eval_auc': 0.8660665851902823, 'eval_runtime': 17.6645, 'eval_samples_per_second': 80.5, 'eval_steps_per_second': 20.153, 'epoch': 78.0}
{'loss': 0.8711, 'learning_rate': 0.00021940928270042196, 'epoch': 78.06}
{'loss': 0.8292, 'learning_rate': 0.0002158931082981716, 'epoch': 78.41}
{'loss': 0.8432, 'learning_rate': 0.00021237693389592125, 'epoch': 78.76}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4262911081314087, 'eval_precision': 0.5883069232913553, 'eval_recall': 0.576017526255379, 'eval_acc': 0.5668073136427567, 'eval_mcc': 0.501904234067827, 'eval_f1': 0.5803282696079268, 'eval_auc': 0.8712003252741624, 'eval_runtime': 18.4236, 'eval_samples_per_second': 77.183, 'eval_steps_per_second': 19.323, 'epoch': 79.0}
{'loss': 0.8757, 'learning_rate': 0.0002088607594936709, 'epoch': 79.11}
{'loss': 0.7985, 'learning_rate': 0.00020534458509142054, 'epoch': 79.47}
{'loss': 0.863, 'learning_rate': 0.00020182841068917018, 'epoch': 79.82}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4831944704055786, 'eval_precision': 0.5833554006315956, 'eval_recall': 0.5762303608446364, 'eval_acc': 0.5646976090014064, 'eval_mcc': 0.501716041422338, 'eval_f1': 0.5721748755960883, 'eval_auc': 0.8683073043325832, 'eval_runtime': 17.6028, 'eval_samples_per_second': 80.782, 'eval_steps_per_second': 20.224, 'epoch': 80.0}
{'loss': 0.8503, 'learning_rate': 0.00019831223628691982, 'epoch': 80.17}
{'loss': 0.8384, 'learning_rate': 0.00019479606188466946, 'epoch': 80.52}
{'loss': 0.8122, 'learning_rate': 0.0001912798874824191, 'epoch': 80.87}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5045512914657593, 'eval_precision': 0.5988916148364706, 'eval_recall': 0.5781530863861657, 'eval_acc': 0.5675105485232067, 'eval_mcc': 0.5040737208346378, 'eval_f1': 0.582407784786866, 'eval_auc': 0.8674600067600029, 'eval_runtime': 17.7293, 'eval_samples_per_second': 80.206, 'eval_steps_per_second': 20.08, 'epoch': 81.0}
{'loss': 0.8144, 'learning_rate': 0.0001877637130801688, 'epoch': 81.22}
{'loss': 0.8228, 'learning_rate': 0.00018424753867791845, 'epoch': 81.58}
{'loss': 0.8225, 'learning_rate': 0.0001807313642756681, 'epoch': 81.93}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4650161266326904, 'eval_precision': 0.5904788012279937, 'eval_recall': 0.5777536557070242, 'eval_acc': 0.5689170182841069, 'eval_mcc': 0.503450705022749, 'eval_f1': 0.5825244355893044, 'eval_auc': 0.8687680173212968, 'eval_runtime': 18.4061, 'eval_samples_per_second': 77.257, 'eval_steps_per_second': 19.341, 'epoch': 82.0}
{'loss': 0.841, 'learning_rate': 0.00017721518987341773, 'epoch': 82.28}
{'loss': 0.7544, 'learning_rate': 0.00017369901547116738, 'epoch': 82.63}
{'loss': 0.8177, 'learning_rate': 0.00017018284106891702, 'epoch': 82.98}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.486862301826477, 'eval_precision': 0.591855025464564, 'eval_recall': 0.5840248066956901, 'eval_acc': 0.5731364275668073, 'eval_mcc': 0.509467722658299, 'eval_f1': 0.5858889132894637, 'eval_auc': 0.8700378124797131, 'eval_runtime': 17.63, 'eval_samples_per_second': 80.658, 'eval_steps_per_second': 20.193, 'epoch': 83.0}
{'loss': 0.787, 'learning_rate': 0.00016666666666666666, 'epoch': 83.33}
{'loss': 0.8001, 'learning_rate': 0.0001631504922644163, 'epoch': 83.68}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.495125412940979, 'eval_precision': 0.5789517077553431, 'eval_recall': 0.5664281953582266, 'eval_acc': 0.5576652601969058, 'eval_mcc': 0.49075455291938236, 'eval_f1': 0.5703866234960228, 'eval_auc': 0.8679361978264343, 'eval_runtime': 17.5032, 'eval_samples_per_second': 81.242, 'eval_steps_per_second': 20.339, 'epoch': 84.0}
{'loss': 0.8142, 'learning_rate': 0.00015963431786216597, 'epoch': 84.04}
{'loss': 0.7575, 'learning_rate': 0.00015611814345991562, 'epoch': 84.39}
{'loss': 0.788, 'learning_rate': 0.00015260196905766526, 'epoch': 84.74}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4942790269851685, 'eval_precision': 0.5944234698361796, 'eval_recall': 0.5743202202420583, 'eval_acc': 0.5639943741209564, 'eval_mcc': 0.4991136705343948, 'eval_f1': 0.5803183692369547, 'eval_auc': 0.8682998644649744, 'eval_runtime': 18.5114, 'eval_samples_per_second': 76.818, 'eval_steps_per_second': 19.231, 'epoch': 85.0}
{'loss': 0.7996, 'learning_rate': 0.0001490857946554149, 'epoch': 85.09}
{'loss': 0.775, 'learning_rate': 0.00014556962025316455, 'epoch': 85.44}
{'loss': 0.7501, 'learning_rate': 0.00014205344585091422, 'epoch': 85.79}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5373444557189941, 'eval_precision': 0.5952318116463217, 'eval_recall': 0.5815531767673775, 'eval_acc': 0.5731364275668073, 'eval_mcc': 0.5086961349429062, 'eval_f1': 0.5860403943152593, 'eval_auc': 0.8675155287152246, 'eval_runtime': 17.6485, 'eval_samples_per_second': 80.573, 'eval_steps_per_second': 20.172, 'epoch': 86.0}
{'loss': 0.7654, 'learning_rate': 0.00013853727144866386, 'epoch': 86.15}
{'loss': 0.7583, 'learning_rate': 0.0001350210970464135, 'epoch': 86.5}
{'loss': 0.8035, 'learning_rate': 0.00013150492264416317, 'epoch': 86.85}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5052584409713745, 'eval_precision': 0.6033750812938433, 'eval_recall': 0.5927022207603821, 'eval_acc': 0.5843881856540084, 'eval_mcc': 0.5218480256244703, 'eval_f1': 0.5966617781513737, 'eval_auc': 0.8690873352954325, 'eval_runtime': 17.5517, 'eval_samples_per_second': 81.018, 'eval_steps_per_second': 20.283, 'epoch': 87.0}
{'loss': 0.7241, 'learning_rate': 0.00012798874824191281, 'epoch': 87.2}
{'loss': 0.7746, 'learning_rate': 0.00012447257383966246, 'epoch': 87.55}
{'loss': 0.7418, 'learning_rate': 0.0001209563994374121, 'epoch': 87.9}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5178202390670776, 'eval_precision': 0.5936970766700305, 'eval_recall': 0.5822614870592451, 'eval_acc': 0.5738396624472574, 'eval_mcc': 0.509656885121621, 'eval_f1': 0.5869285380564108, 'eval_auc': 0.8696702169404553, 'eval_runtime': 18.5302, 'eval_samples_per_second': 76.74, 'eval_steps_per_second': 19.212, 'epoch': 88.0}
{'loss': 0.7323, 'learning_rate': 0.00011744022503516174, 'epoch': 88.26}
{'loss': 0.7194, 'learning_rate': 0.00011392405063291139, 'epoch': 88.61}
{'loss': 0.7761, 'learning_rate': 0.00011040787623066104, 'epoch': 88.96}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.4847440719604492, 'eval_precision': 0.5927350174588568, 'eval_recall': 0.5855171922345059, 'eval_acc': 0.5780590717299579, 'eval_mcc': 0.5140578869541874, 'eval_f1': 0.5883085558277157, 'eval_auc': 0.8716793938683434, 'eval_runtime': 17.5642, 'eval_samples_per_second': 80.96, 'eval_steps_per_second': 20.269, 'epoch': 89.0}
{'loss': 0.7433, 'learning_rate': 0.0001068917018284107, 'epoch': 89.31}
{'loss': 0.7178, 'learning_rate': 0.00010337552742616034, 'epoch': 89.66}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5410324335098267, 'eval_precision': 0.6075461265023191, 'eval_recall': 0.5926492834938788, 'eval_acc': 0.5843881856540084, 'eval_mcc': 0.5217624166417272, 'eval_f1': 0.5987302417596223, 'eval_auc': 0.8697573950080463, 'eval_runtime': 17.602, 'eval_samples_per_second': 80.786, 'eval_steps_per_second': 20.225, 'epoch': 90.0}
{'loss': 0.7172, 'learning_rate': 9.985935302390998e-05, 'epoch': 90.01}
{'loss': 0.7102, 'learning_rate': 9.634317862165963e-05, 'epoch': 90.37}
{'loss': 0.6895, 'learning_rate': 9.282700421940928e-05, 'epoch': 90.72}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.535949945449829, 'eval_precision': 0.5959035485487196, 'eval_recall': 0.5878152085063479, 'eval_acc': 0.5780590717299579, 'eval_mcc': 0.515464078654635, 'eval_f1': 0.5891871655967333, 'eval_auc': 0.8716879859461903, 'eval_runtime': 18.5009, 'eval_samples_per_second': 76.861, 'eval_steps_per_second': 19.242, 'epoch': 91.0}
{'loss': 0.7072, 'learning_rate': 8.931082981715894e-05, 'epoch': 91.07}
{'loss': 0.7137, 'learning_rate': 8.579465541490858e-05, 'epoch': 91.42}
{'loss': 0.7254, 'learning_rate': 8.227848101265823e-05, 'epoch': 91.77}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5600769519805908, 'eval_precision': 0.5917703977426229, 'eval_recall': 0.5859911993052299, 'eval_acc': 0.5766526019690577, 'eval_mcc': 0.5125834064910659, 'eval_f1': 0.5879974143116766, 'eval_auc': 0.8715369321982481, 'eval_runtime': 17.6248, 'eval_samples_per_second': 80.682, 'eval_steps_per_second': 20.199, 'epoch': 92.0}
{'loss': 0.6563, 'learning_rate': 7.876230661040788e-05, 'epoch': 92.12}
{'loss': 0.6907, 'learning_rate': 7.524613220815753e-05, 'epoch': 92.48}
{'loss': 0.7044, 'learning_rate': 7.172995780590717e-05, 'epoch': 92.83}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5286455154418945, 'eval_precision': 0.6108615531304717, 'eval_recall': 0.5944860899246196, 'eval_acc': 0.5857946554149086, 'eval_mcc': 0.5236982770843828, 'eval_f1': 0.6003558012113177, 'eval_auc': 0.8704557545596614, 'eval_runtime': 17.5777, 'eval_samples_per_second': 80.898, 'eval_steps_per_second': 20.253, 'epoch': 93.0}
{'loss': 0.7211, 'learning_rate': 6.821378340365681e-05, 'epoch': 93.18}
{'loss': 0.6577, 'learning_rate': 6.469760900140648e-05, 'epoch': 93.53}
{'loss': 0.7308, 'learning_rate': 6.118143459915612e-05, 'epoch': 93.88}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5428887605667114, 'eval_precision': 0.6078396071693346, 'eval_recall': 0.5948365940013316, 'eval_acc': 0.5843881856540084, 'eval_mcc': 0.5226948172731368, 'eval_f1': 0.5984464428638903, 'eval_auc': 0.871448536087463, 'eval_runtime': 18.4764, 'eval_samples_per_second': 76.963, 'eval_steps_per_second': 19.268, 'epoch': 94.0}
{'loss': 0.6925, 'learning_rate': 5.766526019690577e-05, 'epoch': 94.23}
{'loss': 0.6851, 'learning_rate': 5.414908579465542e-05, 'epoch': 94.59}
{'loss': 0.6548, 'learning_rate': 5.0632911392405066e-05, 'epoch': 94.94}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.542696475982666, 'eval_precision': 0.6001564239927355, 'eval_recall': 0.591352765917101, 'eval_acc': 0.5822784810126582, 'eval_mcc': 0.5194403835097205, 'eval_f1': 0.5945399102600201, 'eval_auc': 0.8727060321716973, 'eval_runtime': 17.6469, 'eval_samples_per_second': 80.581, 'eval_steps_per_second': 20.174, 'epoch': 95.0}
{'loss': 0.6357, 'learning_rate': 4.7116736990154716e-05, 'epoch': 95.29}
{'loss': 0.7038, 'learning_rate': 4.360056258790436e-05, 'epoch': 95.64}
{'loss': 0.658, 'learning_rate': 4.0084388185654015e-05, 'epoch': 95.99}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5528651475906372, 'eval_precision': 0.6018964470618813, 'eval_recall': 0.5926533267392855, 'eval_acc': 0.5822784810126582, 'eval_mcc': 0.5193752472906814, 'eval_f1': 0.5961082907776647, 'eval_auc': 0.8731579612339219, 'eval_runtime': 17.9715, 'eval_samples_per_second': 79.125, 'eval_steps_per_second': 19.809, 'epoch': 96.0}
{'loss': 0.6757, 'learning_rate': 3.656821378340366e-05, 'epoch': 96.34}
{'loss': 0.6723, 'learning_rate': 3.305203938115331e-05, 'epoch': 96.69}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5665769577026367, 'eval_precision': 0.5914429802433749, 'eval_recall': 0.5834807079619061, 'eval_acc': 0.5731364275668073, 'eval_mcc': 0.5091071707991321, 'eval_f1': 0.5859134241369199, 'eval_auc': 0.8719116101040888, 'eval_runtime': 18.4085, 'eval_samples_per_second': 77.247, 'eval_steps_per_second': 19.339, 'epoch': 97.0}
{'loss': 0.6359, 'learning_rate': 2.9535864978902954e-05, 'epoch': 97.05}
{'loss': 0.6552, 'learning_rate': 2.6019690576652604e-05, 'epoch': 97.4}
{'loss': 0.6681, 'learning_rate': 2.250351617440225e-05, 'epoch': 97.75}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.5610990524291992, 'eval_precision': 0.6067347478949163, 'eval_recall': 0.598045498751123, 'eval_acc': 0.5879043600562588, 'eval_mcc': 0.5259325938400049, 'eval_f1': 0.6013933782305144, 'eval_auc': 0.8729865288452804, 'eval_runtime': 16.79, 'eval_samples_per_second': 84.693, 'eval_steps_per_second': 21.203, 'epoch': 98.0}
{'loss': 0.6231, 'learning_rate': 1.89873417721519e-05, 'epoch': 98.1}
{'loss': 0.627, 'learning_rate': 1.547116736990155e-05, 'epoch': 98.45}
{'loss': 0.6549, 'learning_rate': 1.1954992967651194e-05, 'epoch': 98.8}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.554926872253418, 'eval_precision': 0.6047222519817455, 'eval_recall': 0.5949037559209938, 'eval_acc': 0.5857946554149086, 'eval_mcc': 0.5234002979983099, 'eval_f1': 0.5986617317788157, 'eval_auc': 0.8730731281612797, 'eval_runtime': 16.1538, 'eval_samples_per_second': 88.029, 'eval_steps_per_second': 22.038, 'epoch': 99.0}
{'loss': 0.6633, 'learning_rate': 8.438818565400844e-06, 'epoch': 99.16}
{'loss': 0.6443, 'learning_rate': 4.922644163150493e-06, 'epoch': 99.51}
{'loss': 0.6182, 'learning_rate': 1.4064697609001406e-06, 'epoch': 99.86}


  0%|          | 0/356 [00:00<?, ?it/s]

{'eval_loss': 1.555328607559204, 'eval_precision': 0.603934440043556, 'eval_recall': 0.5969301408502801, 'eval_acc': 0.5879043600562588, 'eval_mcc': 0.5258677780142589, 'eval_f1': 0.5993309151927436, 'eval_auc': 0.8731178782697037, 'eval_runtime': 17.0582, 'eval_samples_per_second': 83.361, 'eval_steps_per_second': 20.87, 'epoch': 100.0}
{'train_runtime': 16031.1187, 'train_samples_per_second': 35.462, 'train_steps_per_second': 8.87, 'train_loss': 1.2234708714652833, 'epoch': 100.0}


In [None]:
input = tokenizer.encode(malware_calls.iloc[0]['api'], return_tensors="pt")
input = input.to(torch.device("cuda"))
logits = peft_model(input).logits
prediction = torch.argmax(logits)
print(malware_calls.iloc[0]['class'] + " - " + IDX2CAT[prediction.tolist()])

Trojan - Downloader


In [None]:
input = tokenizer.encode(malware_calls.iloc[0]['api'], return_tensors="pt")
input = input.to(torch.device("cuda"))
logits = peft_model(input).logits
prediction = torch.max(logits, 1).indices
print(malware_calls.iloc[0]['class'] + " - " + IDX2CAT[prediction.tolist()[0]])

In [None]:
print("hello")