# 数据准备

In [1]:
from datasets import load_dataset

data = load_dataset("cardiffnlp/tweet_sentiment_multilingual/data/english")
data["val"] = data["validation"]
del data["validation"]

In [2]:
import pandas as pd
data["train"].to_pandas().label.value_counts()

label
0    613
1    613
2    613
Name: count, dtype: int64

In [3]:
# 字符数
max_char = data['train'].to_pandas()['text'].str.len().max()
print(f"The maximum number of characters is {max_char}")
# 词数
max_words = data['train'].to_pandas()['text'].str.split().str.len().max()
print(f"The maximum number of words is {max_words}")

The maximum number of characters is 185
The maximum number of words is 33


## llama tokenizer

In [4]:
from transformers import AutoTokenizer, DataCollatorWithPadding
llama_path = "./Meta-Llama-3.1-8B"
llama_tokenizer = AutoTokenizer.from_pretrained(llama_path, add_prefix_space=True)
llama_tokenizer.pad_token_id = llama_tokenizer.eos_token_id
llama_tokenizer.pad_token = llama_tokenizer.eos_token
def preprocessing_function(examples):
    examples['label'] = [int(i) for i in examples['label']]
    return llama_tokenizer(examples['text'], truncation=True, padding='max_length', max_length=64)

tokenized_data = data.map(preprocessing_function, batched=True, remove_columns= ["text"])
tokenized_data.set_format("torch")
# 创建 DataCollatorWithPadding 实例
data_collator = DataCollatorWithPadding(tokenizer=llama_tokenizer)


In [5]:
tokenized_data["train"]

Dataset({
    features: ['label', 'input_ids', 'attention_mask'],
    num_rows: 1839
})

## llama model

In [6]:
from transformers import AutoModelForSequenceClassification
import torch

pretrain_model = AutoModelForSequenceClassification.from_pretrained(llama_path, 
                                                                 num_labels=3,
                                                                device_map="auto",
                                                                offload_folder="offload",
                                                                trust_remote_code=True)
pretrain_model.config.pad_token_id = llama_tokenizer.pad_token_id
# llama_model.config.use_cache = False
# llama_model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at ./Meta-Llama-3.1-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
model_dtype = next(pretrain_model.parameters()).dtype
print(f"Model dtype: {model_dtype}")

Model dtype: torch.float32


## LoRA 配置

In [24]:
from peft import get_peft_model, LoraConfig, TaskType
lora_alpha = 8
lora_rank = 4


lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS, 
    r=lora_rank, 
    lora_alpha=lora_alpha, 
    lora_dropout=0.05, 
    bias="none",
    target_modules=[
       "q_proj",  "v_proj"
    ],
)

llama_model = get_peft_model(pretrain_model, lora_config)
llama_model.print_trainable_parameters()

trainable params: 6,959,104 || all params: 7,511,896,064 || trainable%: 0.0926


## 评估指标

In [8]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import numpy as np

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    # eval_pred 是模型返回的预测值和实际值元组
    predictions = np.argmax(logits, axis=-1)
    
    precision = precision_score(labels, predictions, average='macro')
    recall = recall_score(labels, predictions, average='macro')
    f1 = f1_score(labels, predictions, average='macro')
    accuracy = accuracy_score(labels, predictions)
    
    # 返回包含所有指标的字典
    return {"precision": precision, "recall": recall, "f1-score": f1, "accuracy": accuracy}


In [26]:
from transformers import TrainingArguments, Trainer

lr = 1e-4
batch_size = 8
num_epochs = 5

training_args = TrainingArguments(
    output_dir=f"./results/lr={lr}_lora_alpha={lora_alpha}_lora_r={lora_rank}",
    learning_rate=lr,
    lr_scheduler_type= "constant",
    warmup_ratio= 0.1,
    max_grad_norm= 0.3,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    weight_decay=0.001,
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2, 
    load_best_model_at_end=True,
    report_to="wandb",
    fp16=True,
    gradient_checkpointing=True,
)


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [27]:
trainer = Trainer(
    model=pretrain_model,
    args=training_args,
    train_dataset=tokenized_data['train'],
    eval_dataset=tokenized_data['val'],
    tokenizer=llama_tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator
)

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [23]:
# 开始训练

trainer.train()

  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,0.699357,0.717789,0.70679,0.71019,0.70679
2,No log,0.833159,0.685659,0.688272,0.655947,0.688272
3,0.871600,0.751952,0.761451,0.762346,0.761073,0.762346
4,0.871600,1.516374,0.711256,0.709877,0.708075,0.709877
5,0.272300,2.267151,0.710639,0.70679,0.704869,0.70679


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


TrainOutput(global_step=1150, training_loss=0.5121667812181556, metrics={'train_runtime': 631.7992, 'train_samples_per_second': 14.554, 'train_steps_per_second': 1.82, 'total_flos': 2.466870316498944e+16, 'train_loss': 0.5121667812181556, 'epoch': 5.0})

In [10]:
from transformers import TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig, TaskType

batch_size = 8
num_epochs = 10
lr_pars = [1e-05]
lora_ranks = [4,8,16,32]
lora_alphas = [16,32,64]

for lr in lr_pars:
    for lora_rank in lora_ranks:
        for lora_alpha in lora_alphas:
            
            lora_config = LoraConfig(
                task_type=TaskType.SEQ_CLS, 
                r=lora_rank, 
                lora_alpha=lora_alpha, 
                lora_dropout=0.05, 
                bias="none",
                target_modules=[
                "q_proj",  "v_proj"
                ],
            )

            llama_model = get_peft_model(pretrain_model, lora_config)
            llama_model.print_trainable_parameters()

            training_args = TrainingArguments(
                output_dir=f"./results/lr={lr}_lora_alpha={lora_alpha}_lora_r={lora_rank}",
                learning_rate=lr,
                lr_scheduler_type= "constant",
                warmup_ratio= 0.1,
                max_grad_norm= 0.3,
                per_device_train_batch_size=batch_size,
                per_device_eval_batch_size=batch_size,
                num_train_epochs=num_epochs,
                weight_decay=0.001,
                eval_strategy="epoch",
                save_strategy="epoch",
                metric_for_best_model="eval_f1-score",  # 使用准确率选择最佳模型
                greater_is_better=True,       # 指标越大越好
                save_total_limit=3, 
                load_best_model_at_end=True,
                report_to="wandb",
                fp16=True,
                gradient_checkpointing=True,
            )

            trainer = Trainer(
                model=llama_model,
                args=training_args,
                train_dataset=tokenized_data['train'],
                eval_dataset=tokenized_data['val'],
                tokenizer=llama_tokenizer,
                compute_metrics=compute_metrics,
                data_collator=data_collator
            )

            # 开始训练
            print(f"------lr={lr}_lora_rank={lora_rank}_lora_alpha={lora_alpha}")
            trainer.train()     
            trainer.save_model(f"./results/lr={lr}_lora_alpha={lora_alpha}_lora_r={lora_rank}/best_model")
            



  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


trainable params: 1,716,224 || all params: 7,506,653,184 || trainable%: 0.0229


------lr=0.0002_lora_rank=4_lora_alpha=16


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mzhenyiq[0m ([33mzhenyiq-federal-student-aid[0m). Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,1.075364,0.555704,0.524691,0.528514,0.524691
2,No log,1.1054,0.526381,0.509259,0.493931,0.509259
3,1.147400,1.018396,0.592192,0.57716,0.571123,0.57716
4,1.147400,1.054032,0.571631,0.580247,0.550494,0.580247
5,0.836100,1.080845,0.580026,0.540123,0.544458,0.540123


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


trainable params: 1,716,224 || all params: 7,506,653,184 || trainable%: 0.0229


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=0.0002_lora_rank=4_lora_alpha=32


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,0.6805,0.706694,0.691358,0.692014,0.691358
2,No log,0.742588,0.692425,0.694444,0.684449,0.694444
3,0.826700,1.549604,0.711502,0.682099,0.638205,0.682099
4,0.826700,1.426095,0.741077,0.722222,0.727291,0.722222
5,0.350400,2.194007,0.709478,0.685185,0.689436,0.685185


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 1,716,224 || all params: 7,506,653,184 || trainable%: 0.0229


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=0.0002_lora_rank=4_lora_alpha=64


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,0.707823,0.680666,0.675926,0.666134,0.675926
2,No log,0.6911,0.717917,0.722222,0.710117,0.722222
3,0.845700,1.009572,0.722106,0.703704,0.707999,0.703704
4,0.845700,1.56541,0.758497,0.70679,0.710581,0.70679
5,0.379500,2.375787,0.718079,0.722222,0.717058,0.722222


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 3,420,160 || all params: 7,508,357,120 || trainable%: 0.0456


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=0.0002_lora_rank=8_lora_alpha=16


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,0.810529,0.710457,0.62963,0.621371,0.62963
2,No log,0.684837,0.722484,0.728395,0.721649,0.728395
3,0.858400,0.877492,0.71106,0.712963,0.709853,0.712963
4,0.858400,1.622272,0.714459,0.697531,0.702068,0.697531
5,0.344600,2.532599,0.745673,0.709877,0.715225,0.709877


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 3,420,160 || all params: 7,508,357,120 || trainable%: 0.0456


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=0.0002_lora_rank=8_lora_alpha=32


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,0.694793,0.687155,0.691358,0.670069,0.691358
2,No log,0.67819,0.699342,0.700617,0.699026,0.700617
3,0.829600,1.006955,0.699969,0.669753,0.660223,0.669753
4,0.829600,1.644518,0.726776,0.679012,0.683386,0.679012
5,0.393900,3.590572,0.685996,0.688272,0.670332,0.688272


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 3,420,160 || all params: 7,508,357,120 || trainable%: 0.0456


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=0.0002_lora_rank=8_lora_alpha=64


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,0.679789,0.719373,0.709877,0.708179,0.709877
2,No log,0.754878,0.717065,0.719136,0.704417,0.719136
3,0.854700,0.943604,0.733488,0.734568,0.728982,0.734568
4,0.854700,1.456887,0.719904,0.722222,0.71503,0.722222
5,0.358300,2.279661,0.754372,0.746914,0.748049,0.746914


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


trainable params: 6,828,032 || all params: 7,511,764,992 || trainable%: 0.0909
------lr=0.0002_lora_rank=16_lora_alpha=16


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,0.711446,0.682151,0.66358,0.668141,0.66358
2,No log,0.732627,0.700266,0.700617,0.686973,0.700617
3,0.870600,0.820301,0.745991,0.740741,0.741896,0.740741
4,0.870600,1.460856,0.732254,0.737654,0.733458,0.737654
5,0.342700,2.555985,0.734775,0.694444,0.69944,0.694444


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


trainable params: 6,828,032 || all params: 7,511,764,992 || trainable%: 0.0909
------lr=0.0002_lora_rank=16_lora_alpha=32


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,0.793296,0.731357,0.617284,0.609974,0.617284
2,No log,0.691889,0.707466,0.712963,0.703973,0.712963
3,0.855700,0.833375,0.7043,0.703704,0.701661,0.703704
4,0.855700,1.396222,0.730311,0.728395,0.725257,0.728395
5,0.368100,2.4704,0.734076,0.728395,0.729977,0.728395


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 6,828,032 || all params: 7,511,764,992 || trainable%: 0.0909


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=0.0002_lora_rank=16_lora_alpha=64


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,0.749564,0.700079,0.666667,0.669616,0.666667
2,No log,0.720072,0.709793,0.709877,0.701594,0.709877
3,0.827100,1.137385,0.718919,0.70679,0.705334,0.70679
4,0.827100,1.461274,0.705,0.700617,0.687321,0.700617
5,0.409500,2.231652,0.721455,0.703704,0.703184,0.703704


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 13,643,776 || all params: 7,518,580,736 || trainable%: 0.1815


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=0.0002_lora_rank=32_lora_alpha=16


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,0.922341,0.714936,0.58642,0.557358,0.58642
2,No log,0.798996,0.685351,0.679012,0.668178,0.679012
3,0.873400,0.805548,0.738288,0.740741,0.735586,0.740741
4,0.873400,1.921062,0.678562,0.679012,0.671448,0.679012
5,0.371100,2.1453,0.724787,0.716049,0.718844,0.716049


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 13,643,776 || all params: 7,518,580,736 || trainable%: 0.1815


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=0.0002_lora_rank=32_lora_alpha=32


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,0.692109,0.714229,0.688272,0.692578,0.688272
2,No log,0.722416,0.726504,0.719136,0.699232,0.719136
3,0.862100,0.855889,0.685421,0.685185,0.673143,0.685185
4,0.862100,1.477592,0.711994,0.712963,0.712407,0.712963
5,0.377500,2.354827,0.715561,0.719136,0.708225,0.719136


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


trainable params: 13,643,776 || all params: 7,518,580,736 || trainable%: 0.1815
------lr=0.0002_lora_rank=32_lora_alpha=64


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,0.705581,0.670058,0.66358,0.663521,0.66358
2,No log,0.731602,0.720495,0.722222,0.711346,0.722222
3,0.832600,1.102251,0.735397,0.731481,0.725479,0.731481
4,0.832600,1.310714,0.679652,0.682099,0.680277,0.682099
5,0.408100,1.803135,0.723849,0.716049,0.715787,0.716049


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 1,716,224 || all params: 7,506,653,184 || trainable%: 0.0229


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=1e-05_lora_rank=4_lora_alpha=16


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,1.310628,0.397986,0.398148,0.394981,0.398148
2,No log,1.111999,0.469449,0.469136,0.462478,0.469136
3,1.398400,1.020452,0.479264,0.484568,0.475617,0.484568
4,1.398400,0.945334,0.568066,0.567901,0.565544,0.567901
5,0.917500,0.894878,0.591609,0.601852,0.591181,0.601852


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 1,716,224 || all params: 7,506,653,184 || trainable%: 0.0229


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=1e-05_lora_rank=4_lora_alpha=32


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,1.251556,0.410931,0.410494,0.406657,0.410494
2,No log,1.055866,0.503782,0.506173,0.497746,0.506173
3,1.339300,0.961997,0.542589,0.546296,0.541801,0.546296
4,1.339300,0.903534,0.591718,0.583333,0.585409,0.583333
5,0.810400,0.89492,0.583241,0.592593,0.585421,0.592593


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 1,716,224 || all params: 7,506,653,184 || trainable%: 0.0229


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=1e-05_lora_rank=4_lora_alpha=64


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,1.172707,0.42561,0.429012,0.422369,0.429012
2,No log,1.000515,0.519952,0.530864,0.518855,0.530864
3,1.269500,0.906774,0.591296,0.592593,0.591278,0.592593
4,1.269500,0.905098,0.616928,0.601852,0.605904,0.601852
5,0.677600,0.98138,0.57917,0.580247,0.579478,0.580247


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 3,420,160 || all params: 7,508,357,120 || trainable%: 0.0456


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=1e-05_lora_rank=8_lora_alpha=16


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,1.310875,0.404879,0.404321,0.401358,0.404321
2,No log,1.112576,0.476382,0.475309,0.468924,0.475309
3,1.399500,1.021533,0.490724,0.493827,0.485604,0.493827
4,1.399500,0.945534,0.562494,0.561728,0.559526,0.561728
5,0.920300,0.896957,0.580077,0.589506,0.580737,0.589506


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 3,420,160 || all params: 7,508,357,120 || trainable%: 0.0456


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=1e-05_lora_rank=8_lora_alpha=32


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,1.251884,0.421858,0.419753,0.416364,0.419753
2,No log,1.055422,0.498989,0.503086,0.494061,0.503086
3,1.339600,0.961246,0.532901,0.537037,0.531877,0.537037
4,1.339600,0.898302,0.614879,0.608025,0.609389,0.608025
5,0.814100,0.882118,0.570639,0.57716,0.57302,0.57716


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 3,420,160 || all params: 7,508,357,120 || trainable%: 0.0456


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=1e-05_lora_rank=8_lora_alpha=64


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,1.171138,0.437584,0.438272,0.432941,0.438272
2,No log,0.998037,0.512732,0.521605,0.51183,0.521605
3,1.265500,0.905824,0.589049,0.589506,0.588396,0.589506
4,1.265500,0.900842,0.629291,0.608025,0.613766,0.608025
5,0.679300,1.000629,0.578249,0.570988,0.571333,0.570988


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 6,828,032 || all params: 7,511,764,992 || trainable%: 0.0909


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=1e-05_lora_rank=16_lora_alpha=16


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,1.312329,0.404612,0.404321,0.401428,0.404321
2,No log,1.113903,0.468212,0.469136,0.461565,0.469136
3,1.401200,1.023412,0.477296,0.481481,0.472838,0.481481
4,1.401200,0.946236,0.568066,0.567901,0.565544,0.567901
5,0.921500,0.896247,0.583959,0.592593,0.584831,0.592593


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 6,828,032 || all params: 7,511,764,992 || trainable%: 0.0909


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=1e-05_lora_rank=16_lora_alpha=32


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,1.253615,0.414613,0.41358,0.410094,0.41358
2,No log,1.056614,0.498346,0.5,0.491119,0.5
3,1.341500,0.962858,0.53241,0.537037,0.531681,0.537037
4,1.341500,0.899565,0.597154,0.58642,0.589077,0.58642
5,0.815300,0.887769,0.591261,0.595679,0.592004,0.595679


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 6,828,032 || all params: 7,511,764,992 || trainable%: 0.0909


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=1e-05_lora_rank=16_lora_alpha=64


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,1.172342,0.439005,0.438272,0.433895,0.438272
2,No log,0.996982,0.513687,0.524691,0.511892,0.524691
3,1.267600,0.906537,0.58806,0.589506,0.588052,0.589506
4,1.267600,0.902049,0.609216,0.592593,0.597432,0.592593
5,0.679200,0.993027,0.589995,0.58642,0.585392,0.58642


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 13,643,776 || all params: 7,518,580,736 || trainable%: 0.1815


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=1e-05_lora_rank=32_lora_alpha=16


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,1.313351,0.404612,0.404321,0.401428,0.404321
2,No log,1.114585,0.476365,0.475309,0.469286,0.475309
3,1.402000,1.02298,0.484978,0.487654,0.479458,0.487654
4,1.402000,0.946678,0.574165,0.574074,0.571359,0.574074
5,0.923600,0.897252,0.577763,0.58642,0.578259,0.58642


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 13,643,776 || all params: 7,518,580,736 || trainable%: 0.1815


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=1e-05_lora_rank=32_lora_alpha=32


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,1.252874,0.420856,0.419753,0.415932,0.419753
2,No log,1.054062,0.504806,0.506173,0.497736,0.506173
3,1.341100,0.959935,0.54591,0.549383,0.545171,0.549383
4,1.341100,0.895273,0.593008,0.583333,0.585832,0.583333
5,0.816100,0.880899,0.578599,0.580247,0.579075,0.580247


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


trainable params: 13,643,776 || all params: 7,518,580,736 || trainable%: 0.1815


  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


------lr=1e-05_lora_rank=32_lora_alpha=64


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
1,No log,1.17084,0.441768,0.441358,0.43574,0.441358
2,No log,0.994812,0.515999,0.524691,0.513625,0.524691
3,1.266100,0.905297,0.589179,0.589506,0.588464,0.589506
4,1.266100,0.899075,0.615679,0.595679,0.600722,0.595679
5,0.680800,0.982275,0.58982,0.583333,0.583537,0.583333


  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
