## Importing Modules

In [1]:
import os
import pathlib
import torch

from datasets import load_dataset
from datasets import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from trl import SFTTrainer

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    AutoModel,
    AutoModelForSequenceClassification,
    DataCollatorForLanguageModeling,
    Trainer,
    TrainingArguments,
    pipeline,
    TextDataset,
    EvalPrediction,
    DataCollatorWithPadding,
    GenerationConfig,
    BitsAndBytesConfig
)

from peft import (
    LoraConfig,
    PeftModelForSequenceClassification,
    TaskType,
    AutoPeftModelForSequenceClassification,
    get_peft_model
)

if torch.cuda.is_available():
    print("GPU is available!")
else:
    print("GPU is not available.")

  from .autonotebook import tqdm as notebook_tqdm



GPU is available!


## Set Up Directories

In [2]:
MAIN_PATH = str(pathlib.Path().resolve())
DATASET_PATH = MAIN_PATH + '\\datasets'
MODEL_PATH = MAIN_PATH + '\\models'
SAVE_PATH = MAIN_PATH + '\\fine_tuned_models'
MODELS = 'D:\\AI\\LLM\\models'

## Load Model

In [3]:
models = os.listdir(MODELS)
models

['config.yaml',
 'gemma-2-2b',
 'Llama-3-8B-GPTQ-4-Bit.safetensors',
 'Llama-3-8B-Instruct-GPTQ-4-Bit.safetensors',
 'Llama-3.2-11B-Vision-Instruct-bnb-4bit',
 'Llama-3.2-1B-Instruct',
 'Llama-3.2-3B-Instruct',
 'Meta-Llama-3.1-8B-Instruct-GPTQ-INT4',
 'Phi-3-mini-128k-instruct',
 'Phi-3-mini-128k-instruct-onnx',
 'Phi-3-mini-4k-instruct-q4.gguf',
 'place-your-models-here.txt',
 'Qwen2.5-0.5B',
 'Qwen2.5-1.5B',
 'Qwen2.5-3B',
 'Qwen2.5-7B-Instruct-GPTQ-Int4']

In [4]:
model_path = MODELS + '\\' + models[12]
model_path

'D:\\AI\\LLM\\models\\Qwen2.5-0.5B'

In [5]:
torch.cuda.empty_cache()

In [6]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_compute_dtype = torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    quantization_config = bnb_config,
    trust_remote_code = True
)
model.config.use_cache = False

`low_cpu_mem_usage` was None, now set to True since model is quantized.


## Load Tokenizer

In [7]:
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code = True)
tokenizer.pad_token = tokenizer.eos_token

## Import Dataset

In [8]:
dataset_name = 'Ali-C137/Arabic_guanaco_oasst1'

dataset = load_dataset(dataset_name, split = 'train')
dataset.to_pandas().head()

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Generating train split: 100%|██████████| 9846/9846 [00:00<00:00, 184998.89 examples/s]
Generating test split: 100%|██████████| 518/518 [00:00<00:00, 112345.49 examples/s]


Unnamed: 0,text
0,### الإنسان: هل يمكنك كتابة مقدمة قصيرة حول أه...
1,### الإنسان: ما هي مراحل التطور وماذا تتكون وف...
2,### الإنسان: هل يمكنك شرح التعلم التباين في ال...
3,### الإنسان: أريد أن أبدأ في التصوير الفوتوغرا...
4,### الإنسان: طريقة Biclassical Perceptron: تعر...


In [9]:
dataset

Dataset({
    features: ['text'],
    num_rows: 9846
})

## Set Up PEFT

In [10]:
lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

peft_config = LoraConfig(
    lora_alpha = lora_alpha,
    lora_dropout = lora_dropout,
    r = lora_r,
    bias = 'none',
    task_type = 'CAUSAL_LM',
)

## Set Up Training

In [11]:
per_device_train_batch_size = 4
gradient_accumulation_steps = 4
optim = 'paged_adamw_32bit'
save_steps = 100
logging_steps = 10
learning_rate = 2e-4
max_grad_norm = 0.3
max_steps = 100
warmup_ratio = 0.03
lr_scheduler_type = 'constant'

training_arguments = TrainingArguments(
    output_dir = SAVE_PATH,
    per_device_train_batch_size = per_device_train_batch_size,
    gradient_accumulation_steps = gradient_accumulation_steps,
    optim = optim,
    save_steps = save_steps,
    logging_steps = logging_steps,
    learning_rate = learning_rate,
    fp16 = True,
    max_grad_norm = max_grad_norm,
    max_steps = max_steps,
    warmup_ratio = warmup_ratio,
    group_by_length = True,
    lr_scheduler_type = lr_scheduler_type,
)

In [12]:
max_seq_length = 512

trainer = SFTTrainer(
    model = model,
    train_dataset = dataset,
    peft_config = peft_config,
    dataset_text_field = 'text',
    max_seq_length = max_seq_length,
    tokenizer = tokenizer,
    args = training_arguments,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
Map: 100%|██████████| 9846/9846 [00:02<00:00, 3675.56 examples/s]
max_steps is given, it will override any value given in num_train_epochs


In [13]:
for name, module in trainer.model.named_modules():
    if 'norm' in name:
        module = module.to(torch.float32)

In [17]:
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
trainable_percentage = (trainable_params / total_params) * 100

print('Total parameters :', total_params)
print('Trainable parameters :', trainable_params)
print('Trainable percentage: {:.2f}%'.format(trainable_percentage))

Total parameters : 319444864
Trainable parameters : 4325376
Trainable percentage: 1.35%


In [14]:
trainer.train()

  attn_output = torch.nn.functional.scaled_dot_product_attention(
 10%|█         | 10/100 [01:19<11:28,  7.65s/it]

{'loss': 2.4915, 'grad_norm': 0.14193467795848846, 'learning_rate': 0.0002, 'epoch': 0.02}


 20%|██        | 20/100 [02:48<13:29, 10.12s/it]

{'loss': 2.4875, 'grad_norm': 0.17195671796798706, 'learning_rate': 0.0002, 'epoch': 0.03}


 30%|███       | 30/100 [04:16<09:44,  8.34s/it]

{'loss': 2.5491, 'grad_norm': 0.2242295742034912, 'learning_rate': 0.0002, 'epoch': 0.05}


 40%|████      | 40/100 [04:45<01:59,  2.00s/it]

{'loss': 2.7021, 'grad_norm': 0.30632200837135315, 'learning_rate': 0.0002, 'epoch': 0.06}


 50%|█████     | 50/100 [04:57<00:52,  1.05s/it]

{'loss': 2.839, 'grad_norm': 1.2592346668243408, 'learning_rate': 0.0002, 'epoch': 0.08}


 60%|██████    | 60/100 [06:12<04:50,  7.27s/it]

{'loss': 2.4626, 'grad_norm': 0.20777428150177002, 'learning_rate': 0.0002, 'epoch': 0.1}


 70%|███████   | 70/100 [07:38<04:55,  9.85s/it]

{'loss': 2.4001, 'grad_norm': 0.16684085130691528, 'learning_rate': 0.0002, 'epoch': 0.11}


 80%|████████  | 80/100 [09:09<02:56,  8.83s/it]

{'loss': 2.4816, 'grad_norm': 0.21610809862613678, 'learning_rate': 0.0002, 'epoch': 0.13}


 90%|█████████ | 90/100 [09:44<00:21,  2.11s/it]

{'loss': 2.6349, 'grad_norm': 0.3142939507961273, 'learning_rate': 0.0002, 'epoch': 0.15}


100%|██████████| 100/100 [09:56<00:00,  1.09s/it]

{'loss': 2.7545, 'grad_norm': 1.3272628784179688, 'learning_rate': 0.0002, 'epoch': 0.16}


100%|██████████| 100/100 [09:57<00:00,  5.97s/it]

{'train_runtime': 597.3421, 'train_samples_per_second': 2.679, 'train_steps_per_second': 0.167, 'train_loss': 2.5802895164489748, 'epoch': 0.16}





TrainOutput(global_step=100, training_loss=2.5802895164489748, metrics={'train_runtime': 597.3421, 'train_samples_per_second': 2.679, 'train_steps_per_second': 0.167, 'total_flos': 1194442093937664.0, 'train_loss': 2.5802895164489748, 'epoch': 0.16246953696181965})

In [15]:
save_model = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model
#save_model.save_pretrained(SAVE_PATH)