In [1]:
import sys
sys.path.append("../../../../")
sys.path.append("../../")

In [2]:
!pip install -q --upgrade bitsandbytes==0.48.2 trl==0.25.1 peft

In [3]:
import os
import re
import torch
from torch import nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math 
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer , TrainingArguments , set_seed , BitsAndBytesConfig
from datasets import load_dataset
from peft import LoraConfig
from trl import SFTTrainer , SFTConfig
from datetime import datetime

In [4]:
BASE_MODEL = "meta-llama/Llama-3.2-3B"
PROJECT_NAME = "latest-second-price-p"
USERNAME="ujalaarshad17"
DATA_USER = "ed-donner"
DATASET_NAME = f"{DATA_USER}/items_prompts_lite"
RUN_NAME =  f"{datetime.now():%Y-%m-%d_%H.%M.%S}"
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
HUB_MODEL_NAME = f"{USERNAME}/{PROJECT_RUN_NAME}"

In [5]:
from dotenv import load_dotenv

In [6]:
from dotenv import load_dotenv
load_dotenv(override = True)

True

In [7]:
EPOCHS = 4
BATCH_SIZE = 128
MAX_SEQUENCE_LENGTH = 148
GRADIENT_ACCUMULATION_STEPS = 1

# QLORA 
QUANT_4_BIT = False
LORA_R = 64
LORA_ALPHA = LORA_R * 2
ATTENTION_LAYERS = ["q_proj", "v_proj", "k_proj", "o_proj"]
TARGET_MODULES = ATTENTION_LAYERS 
LORA_DROPOUT = 0.2

# Hyperparameters
LEARNING_RATE = 5e-4
WARMUP_RATIO = 0.05
LR_SCHEDULER_TYPE = 'cosine'
WEIGHT_DECAY = 0.001
OPTIMIZER = "paged_adamw_32bit"

capability = torch.cuda.get_device_capability()
use_bf16 = capability[0] >= 8

In [8]:
use_bf16

True

In [9]:
VAL_SIZE = 500 
LOG_STEPS = 5 
SAVE_STEPS = 100 
LOG_TO_WANDB = True

In [10]:
from huggingface_hub import login

In [11]:
hf_token = os.getenv("HF_TOKEN")
login(hf_token, add_to_git_credential=True)

Token has not been saved to git credential helper.
Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 'store' credential helper as default.

git config --global credential.helper store

Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.[0m


In [12]:
!pip install -q wandb

In [13]:
import wandb
wandb_api_key = os.getenv('WANDB_API_KEY')
os.environ["WANDB_API_KEY"] = wandb_api_key
wandb.login()

os.environ["WANDB_PROJECT"] = PROJECT_NAME
os.environ["WANDB_LOG_MODEL"] = "false"
os.environ["WANDB_WATCH"] = "false"

[34m[1mwandb[0m: Currently logged in as: [33mawaistahseen009[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [14]:
dataset = load_dataset(DATASET_NAME)
train = dataset['train']
val = dataset['val'].select(range(VAL_SIZE))
test = dataset['test']

In [15]:
if LOG_TO_WANDB:
  wandb.init(project=PROJECT_NAME, name=RUN_NAME)

In [16]:
if QUANT_4_BIT:
  quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16 if use_bf16 else torch.float16,
    bnb_4bit_quant_type="nf4"
  )
else:
  quant_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_compute_dtype=torch.bfloat16 if use_bf16 else torch.float16,
  )

In [17]:
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=quant_config,
    device_map="auto",
)
base_model.generation_config.pad_token_id = tokenizer.pad_token_id

print(f"Memory footprint: {base_model.get_memory_footprint() / 1e9:.1f} GB")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Memory footprint: 3.6 GB


In [18]:
lora_parameters = LoraConfig(
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    r=LORA_R,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=TARGET_MODULES,
)

In [19]:
train_parameters = SFTConfig(
    output_dir=PROJECT_RUN_NAME,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    optim=OPTIMIZER,
    save_steps=SAVE_STEPS,
    save_total_limit=10,
    logging_steps=LOG_STEPS,
    learning_rate=LEARNING_RATE,
    weight_decay=0.001,
    fp16=not use_bf16,
    bf16=use_bf16,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=WARMUP_RATIO,
    group_by_length=True,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    report_to="wandb" if LOG_TO_WANDB else None,
    run_name=RUN_NAME,
    max_length=MAX_SEQUENCE_LENGTH,
    save_strategy="steps",
    hub_strategy="every_save",
    push_to_hub=True,
    hub_model_id=HUB_MODEL_NAME,
    hub_private_repo=True,
    eval_strategy="steps",
    eval_steps=SAVE_STEPS
)

In [20]:
fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=train,
    eval_dataset=val,
    peft_config=lora_parameters,
    args=train_parameters
)

In [21]:
fine_tuning.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': None}.
  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
100,1.2816,1.288795,2.832609,1345782.0,0.7605
200,1.2834,1.2937,3.190707,2641272.0,0.7615
300,1.1979,1.272192,3.018592,3970553.0,0.764
400,1.1822,1.267577,2.868697,5276235.0,0.7645
500,1.0154,1.413731,2.768819,6585497.0,0.7605
600,1.0014,1.418963,2.722488,7908104.0,0.761


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=628, training_loss=1.1866882386480926, metrics={'train_runtime': 1891.1495, 'train_samples_per_second': 42.302, 'train_steps_per_second': 0.332, 'total_flos': 1.4230161709478707e+17, 'train_loss': 1.1866882386480926, 'entropy': 2.8794817129770913, 'num_tokens': 8243892.0, 'mean_token_accuracy': 0.775390625, 'epoch': 4.0})

In [22]:
fine_tuning.model.push_to_hub(PROJECT_RUN_NAME, private=True)
print(f"Saved to the hub: {PROJECT_RUN_NAME}")

README.md:   0%|          | 0.00/1.77k [00:00<?, ?B/s]

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

No files have been modified since last commit. Skipping to prevent empty commit.


Saved to the hub: latest-second-price-p-2025-12-16_05.43.13


In [23]:
if LOG_TO_WANDB:
  wandb.finish()

0,1
eval/entropy,▃█▅▃▂▁
eval/loss,▂▂▁▁██
eval/mean_token_accuracy,▁▃▇█▁▂
eval/num_tokens,▁▂▄▅▇█
eval/runtime,█▇▆▄▁▃
eval/samples_per_second,▁▂▃▅█▆
eval/steps_per_second,▁▂▃▅█▆
train/entropy,▃▁▅▆▅▆▇█▅▇▇▇▅▆▆▆▆▇▇▇▆▇▅▅▆▆▆▇▅▅▆▆▅▆▆▅▅▅▅▆
train/epoch,▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇███████

0,1
eval/entropy,2.72249
eval/loss,1.41896
eval/mean_token_accuracy,0.761
eval/num_tokens,7908104.0
eval/runtime,60.6457
eval/samples_per_second,8.245
eval/steps_per_second,8.245
total_flos,1.4230161709478707e+17
train/entropy,2.87948
train/epoch,4
