In [1]:
!nvidia-smi

Wed Oct 22 00:05:00 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 580.95.05              Driver Version: 580.95.05      CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          On  |   00000000:32:00.0 Off |                    0 |
| N/A   31C    P0             46W /  400W |       0MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

In [2]:
import os
import re
import math
from tqdm import tqdm
#from google.colab import userdata
from huggingface_hub import login
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, set_seed, BitsAndBytesConfig
from datasets import load_dataset, Dataset, DatasetDict
import wandb
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from datetime import datetime
import matplotlib.pyplot as plt

from dotenv import load_dotenv
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [3]:
hf_token = os.getenv('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [4]:
# Constants
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
PROJECT_NAME = "llama3.1_8B_pricer"
HF_USER = "franzyellow"

# Data
DATASET_NAME = f"{HF_USER}/pricer-data"
MAX_SEQUENCE_LENGTH = 182

# Run name for saving the model in the hub
# for better model version management
RUN_NAME =  f"{datetime.now():%Y-%m-%d_%H.%M.%S}"
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
HUB_MODEL_NAME = f"{HF_USER}/{PROJECT_RUN_NAME}"

# Hyperparameters for QLoRA
LORA_R = 32 # can downgrade to 8 when resource is limited
LORA_ALPHA = 64 # 2r
TARGET_MODULES = ["q_proj", "v_proj", "k_proj", "o_proj"]
LORA_DROPOUT = 0.1
QUANT_4_BIT = True

# Hyperparameters for Training

EPOCHS = 1 # you can do more epochs if you wish, but only 1 is needed - more is probably overkill
BATCH_SIZE = 12 # on an A100 box this can go up to 16
GRADIENT_ACCUMULATION_STEPS = 1 # not really applying here
LEARNING_RATE = 1e-4
LR_SCHEDULER_TYPE = 'cosine' # dynamically lowering the LR in later epoch, cosine is a good shape for the purpose
WARMUP_RATIO = 0.03 # lowering the learning rate at the early steps and warming it up later where cosine scheduler becomes more important
OPTIMIZER = "paged_adamw_32bit" # https://huggingface.co/docs/transformers/main/en/perf_train_gpu_one#optimizer-choice

# Admin config - note that SAVE_STEPS is how often it will upload to the hub
# I've changed this from 5000 to 2000 so that you get more frequent saves

STEPS = 50 # WANDB update freq
SAVE_STEPS = 2000 # model saving freq
LOG_TO_WANDB = True

%matplotlib inline

In [5]:
# Log in to Weights & Biases
wandb_api_key = os.getenv('WANDB_API_KEY')
wandb.login()

# Configure Weights & Biases to record against our project
os.environ["WANDB_PROJECT"] = PROJECT_NAME
os.environ["WANDB_LOG_MODEL"] = "checkpoint" if LOG_TO_WANDB else "end"
os.environ["WANDB_WATCH"] = "gradients"

[34m[1mwandb[0m: Currently logged in as: [33mfranzhuang027[0m ([33mfranzhuang027-university-of-amsterdam[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


### Loading data

In [6]:
dataset = load_dataset(DATASET_NAME)
train = dataset['train']
test = dataset['test']
train[0]

{'text': 'How much does this cost to the nearest dollar?\n\nDelphi FG0166 Fuel Pump Module\nDelphi brings 80 years of OE Heritage into each Delphi pump, ensuring quality and fitment for each Delphi part. Part is validated, tested and matched to the right vehicle application Delphi brings 80 years of OE Heritage into each Delphi assembly, ensuring quality and fitment for each Delphi part Always be sure to check and clean fuel tank to avoid unnecessary returns Rigorous OE-testing ensures the pump can withstand extreme temperatures Brand Delphi, Fit Type Vehicle Specific Fit, Dimensions LxWxH 19.7 x 7.7 x 5.1 inches, Weight 2.2 Pounds, Auto Part Position Unknown, Operation Mode Mechanical, Manufacturer Delphi, Model FUEL PUMP, Dimensions 19.7\n\nPrice is $227.00',
 'price': 226.95}

In [7]:
if LOG_TO_WANDB:
  wandb.init(project=PROJECT_NAME, name=RUN_NAME)

### Loading the model

In [8]:
# pick the right quantization

if QUANT_4_BIT:
  quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
  )
else:
  quant_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_compute_dtype=torch.bfloat16
  )

In [9]:
# Load the Tokenizer and the Model

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=quant_config,
    device_map="auto",
)
base_model.generation_config.pad_token_id = tokenizer.pad_token_id

print(f"Memory footprint: {base_model.get_memory_footprint() / 1e6:.1f} MB")

Loading checkpoint shards: 100%|██████████| 4/4 [00:12<00:00,  3.05s/it]


Memory footprint: 5591.5 MB


## Data Collator

It's important that we ensure during Training that we are not trying to train the model to predict the description of products; only their price.

We need to tell the trainer that everything up to "Price is $" is there to give context to the model to predict the next token, but does not need to be learned.

The trainer needs to teach the model to predict the token(s) after "Price is $".

There is a complicated way to do this by setting Masks, but luckily HuggingFace provides a super simple helper class to take care of this for us.

In [10]:
from trl import DataCollatorForCompletionOnlyLM
response_template = "Price is $" # what is the chunk of text that indicates the prediction target
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer) # building the intended mask behind the scene

### Training Config

A LoraConfig object with our hyperparameters for LoRA

An SFTConfig with our overall Training parameters

In [11]:
# First, specify the configuration parameters for LoRA

lora_parameters = LoraConfig(
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    r=LORA_R,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=TARGET_MODULES,
)

# Next, specify the general configuration parameters for training

train_parameters = SFTConfig(
    output_dir=PROJECT_RUN_NAME,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=1,
    eval_strategy="no", # if 'yes', test performance on the held-out validation set repeatedly
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    optim=OPTIMIZER,
    save_steps=SAVE_STEPS,
    save_total_limit=10,
    logging_steps=STEPS,
    learning_rate=LEARNING_RATE,
    weight_decay=0.001,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=WARMUP_RATIO,
    group_by_length=True,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    report_to="wandb" if LOG_TO_WANDB else None,
    run_name=RUN_NAME,
    max_seq_length=MAX_SEQUENCE_LENGTH,
    dataset_text_field="text",
    save_strategy="steps",
    hub_strategy="every_save",
    push_to_hub=True,
    hub_model_id=HUB_MODEL_NAME,
    hub_private_repo=True
)

# And now, the Supervised Fine Tuning Trainer will carry out the fine-tuning
# Given these 2 sets of configuration parameters
# The latest version of trl is showing a warning about labels - please ignore this warning
# But let me know if you don't see good training results (loss coming down).

fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=train,
    peft_config=lora_parameters,
    args=train_parameters,
    data_collator=collator
  )

Map: 100%|██████████| 400000/400000 [00:39<00:00, 10150.22 examples/s]


In [12]:
# Fine-tune!
fine_tuning.train()

# Push our fine-tuned model to Hugging Face
fine_tuning.model.push_to_hub(PROJECT_RUN_NAME, private=True)
print(f"Saved to the hub: {PROJECT_RUN_NAME}")

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 128001}.


Step,Training Loss
50,2.7472
100,2.0183
150,1.9202
200,1.9029
250,1.9025
300,1.916
350,1.9151
400,1.8935
450,1.8738
500,1.8846


[34m[1mwandb[0m: Adding directory to artifact (llama3.1_8B_pricer-2025-10-22_00.05.08/checkpoint-2000)... Done. 1.3s
[34m[1mwandb[0m: Adding directory to artifact (llama3.1_8B_pricer-2025-10-22_00.05.08/checkpoint-4000)... Done. 1.3s
[34m[1mwandb[0m: Adding directory to artifact (llama3.1_8B_pricer-2025-10-22_00.05.08/checkpoint-6000)... Done. 1.3s
[34m[1mwandb[0m: Adding directory to artifact (llama3.1_8B_pricer-2025-10-22_00.05.08/checkpoint-8000)... Done. 1.2s
[34m[1mwandb[0m: Adding directory to artifact (llama3.1_8B_pricer-2025-10-22_00.05.08/checkpoint-10000)... Done. 1.3s
[34m[1mwandb[0m: Adding directory to artifact (llama3.1_8B_pricer-2025-10-22_00.05.08/checkpoint-12000)... Done. 1.3s
[34m[1mwandb[0m: Adding directory to artifact (llama3.1_8B_pricer-2025-10-22_00.05.08/checkpoint-14000)... Done. 1.3s
[34m[1mwandb[0m: Adding directory to artifact (llama3.1_8B_pricer-2025-10-22_00.05.08/checkpoint-16000)... Done. 1.3s
[34m[1mwandb[0m: Adding directory

Saved to the hub: llama3.1_8B_pricer-2025-10-22_00.05.08


In [13]:
if LOG_TO_WANDB:
  wandb.finish()

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
train/epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇███
train/global_step,▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇██
train/grad_norm,▆▂▂▅▃▁▃▂▂▄▄▄▄▃▃▂▁▄▄▄▄▃▃▄▃▅▆▄▅▄▆▅▆▃▄▃█▅▄█
train/learning_rate,▂▃▄█████▇▇▇▇▇▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▂▂▂▁▁
train/loss,█▇▇▇▆▆▆▆▅▅▄▅▅▅▄▅▄▄▃▄▃▄▂▄▃▃▂▂▂▂▂▃▂▂▂▂▁▂▃▂

0,1
total_flos,3.2137536831720653e+18
train/epoch,1.0
train/global_step,33334.0
train/grad_norm,4.2268
train/learning_rate,0.0
train/loss,1.6946
train_loss,1.75948
train_runtime,23168.6086
train_samples_per_second,17.265
train_steps_per_second,1.439
