In [1]:
import numpy as np 
import pandas as pd 
import os
import torch
device ='cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
!pip install transformers accelerate torch huggingface_hub peft

Collecting peft
  Downloading peft-0.13.2-py3-none-any.whl.metadata (13 kB)
Downloading peft-0.13.2-py3-none-any.whl (320 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.7/320.7 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: peft
Successfully installed peft-0.13.2


In [3]:
from huggingface_hub import login
login("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")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "meta-llama/Llama-3.2-1B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

In [5]:
from datasets import load_dataset

data = load_dataset("openai/gsm8k", "main")
data

README.md:   0%|          | 0.00/7.94k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/2.31M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/419k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7473 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1319 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['question', 'answer'],
        num_rows: 7473
    })
    test: Dataset({
        features: ['question', 'answer'],
        num_rows: 1319
    })
})

In [6]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return (f"Trainable model parameters: {trainable_model_params}, All model parameters: {all_model_params},percentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%")


print(print_number_of_trainable_model_parameters(model))

Trainable model parameters: 1235814400, All model parameters: 1235814400,percentage of trainable model parameters: 100.00%


In [7]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(r=32,lora_alpha = 64, target_modules=["self_attn.q_proj", "self_attn.v_proj"],
                         lora_dropout = 0.2, bias ="none")
peft_model_train = get_peft_model(model, lora_config)
print_number_of_trainable_model_parameters(peft_model_train)

'Trainable model parameters: 3407872, All model parameters: 1239222272,percentage of trainable model parameters: 0.28%'

In [8]:
for name, module in model.named_modules():
    print(name)


model
model.embed_tokens
model.layers
model.layers.0
model.layers.0.self_attn
model.layers.0.self_attn.q_proj
model.layers.0.self_attn.q_proj.base_layer
model.layers.0.self_attn.q_proj.lora_dropout
model.layers.0.self_attn.q_proj.lora_dropout.default
model.layers.0.self_attn.q_proj.lora_A
model.layers.0.self_attn.q_proj.lora_A.default
model.layers.0.self_attn.q_proj.lora_B
model.layers.0.self_attn.q_proj.lora_B.default
model.layers.0.self_attn.q_proj.lora_embedding_A
model.layers.0.self_attn.q_proj.lora_embedding_B
model.layers.0.self_attn.q_proj.lora_magnitude_vector
model.layers.0.self_attn.k_proj
model.layers.0.self_attn.v_proj
model.layers.0.self_attn.v_proj.base_layer
model.layers.0.self_attn.v_proj.lora_dropout
model.layers.0.self_attn.v_proj.lora_dropout.default
model.layers.0.self_attn.v_proj.lora_A
model.layers.0.self_attn.v_proj.lora_A.default
model.layers.0.self_attn.v_proj.lora_B
model.layers.0.self_attn.v_proj.lora_B.default
model.layers.0.self_attn.v_proj.lora_embedding_

In [9]:
def preprocess_function(examples):
    inputs = [q + " " + a for q, a in zip(examples['question'], examples['answer'])]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    model_inputs["labels"] = model_inputs["input_ids"].copy()
    
    return model_inputs
tokenized_datasets = data.map(preprocess_function, batched=True)

Map:   0%|          | 0/7473 [00:00<?, ? examples/s]

Map:   0%|          | 0/1319 [00:00<?, ? examples/s]

In [10]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="/kaggle/working/results",
    num_train_epochs=2,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="/kaggle/working/logs",
)

trainer = Trainer(
    model=peft_model_train,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
)

trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011114338622222577, max=1.0…

Step,Training Loss


KeyboardInterrupt: 

In [14]:
peft_model_path="/kaggle/working/pre"

trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)

('/kaggle/working/pre/tokenizer_config.json',
 '/kaggle/working/pre/special_tokens_map.json',
 '/kaggle/working/pre/tokenizer.json')