In [None]:
import sys
from dotenv import load_dotenv
import os

load_dotenv()

PROJECT_ROOT = os.getenv("PROJECT_ROOT")
sys.path.append(PROJECT_ROOT)
sys.path.append(os.path.join(PROJECT_ROOT, "src"))
sys.path.append(os.path.join(PROJECT_ROOT, "src/scripts"))
sys.path.append(os.path.join(PROJECT_ROOT, "src/utils"))

# Now import the module
from model_lora_finetune import *

In [None]:
args = TrainingArguments(
	per_device_train_batch_size=12,
	gradient_accumulation_steps=24,
	warmup_ratio=0.05,
	num_train_epochs=1,
	learning_rate=2e-4,
	fp16=not is_bfloat16_supported(),
	bf16=is_bfloat16_supported(),
	logging_steps=10,
	optim="adamw_torch",
	weight_decay=0.01,
	lr_scheduler_type="linear",
	seed=3407,
	output_dir=os.path.join(MODELS_PATH, "lora_finetuned_gemma-3-1b-it-4bit"),
	report_to="none",
	eval_strategy="steps",
	eval_steps=10,
	save_strategy="steps",
	save_steps=10,
	save_total_limit=1,
	metric_for_best_model="eval_loss",
	greater_is_better=False,
)

In [None]:
trainer = SFTTrainer(
	model=model,
	tokenizer=tokenizer,
	train_dataset=train_dataset,
	eval_dataset=val_dataset,
	dataset_text_field="text",
	max_seq_length=MAX_SEQ_LENGTH,
	dataset_num_proc=2,
	packing=True,
	args=args,
	max_grad_norm=1.0,
)

In [None]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

In [None]:
trainer.train()