<a href="https://colab.research.google.com/github/azzindani/03_LLM/blob/main/00_Qwen2_5_0_5B_Fine_Tune_PEFT_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 00 Import Modules

In [None]:
#!pip install --upgrade transformers
!pip install -q peft
!pip install -U -q bitsandbytes
!pip install -q datasets
!pip install -q trl

In [None]:
import os
import pathlib
import torch
import numpy as np

from datetime import datetime
from datasets import load_dataset
from datasets import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from trl import SFTTrainer

from transformers import (
  AutoTokenizer,
  AutoModelForCausalLM,
  AutoModelForSeq2SeqLM,
  AutoModel,
  AutoModelForSequenceClassification,
  DataCollatorForLanguageModeling,
  Trainer,
  TrainingArguments,
  pipeline,
  TextDataset,
  EvalPrediction,
  DataCollatorWithPadding,
  GenerationConfig,
  BitsAndBytesConfig,
  DataCollatorForSeq2Seq,
  TextStreamer
)

from peft import (
  LoraConfig,
  PeftModelForSequenceClassification,
  PeftModel,
  TaskType,
  AutoPeftModelForSequenceClassification,
  get_peft_model,
  prepare_model_for_kbit_training
)

if torch.cuda.is_available():
  print("GPU is available!")
else:
  print("GPU is not available.")

GPU is available!


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

## 01 Import Model

In [None]:
#url = 'https://huggingface.co/Qwen/Qwen2.5-0.5B'
#model_name = url.split('.co/')[-1]

model_name = 'Qwen/Qwen2.5-0.5B'

In [None]:
bnb_config = BitsAndBytesConfig(
  load_in_4bit = True,
  bnb_4bit_quant_type = 'nf4',
  bnb_4bit_compute_dtype = torch.float16,
  bnb_4bit_use_double_quant = True,
)

model = AutoModelForCausalLM.from_pretrained(
  model_name,
  quantization_config = bnb_config,
  trust_remote_code = True
).to(device) #'''

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/681 [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now default to True since model is quantized.


model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]



In [None]:
'''model = AutoModelForCausalLM.from_pretrained(
  model_name,
  torch_dtype = torch.float16,
  trust_remote_code = True
).to(device) #'''

'model = AutoModelForCausalLM.from_pretrained(\n  model_name,\n  torch_dtype = torch.float16,\n  trust_remote_code = True\n).to(device) #'

In [None]:
model

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 896)
    (layers): ModuleList(
      (0-23): 24 x Qwen2DecoderLayer(
        (self_attn): Qwen2SdpaAttention(
          (q_proj): Linear4bit(in_features=896, out_features=896, bias=True)
          (k_proj): Linear4bit(in_features=896, out_features=128, bias=True)
          (v_proj): Linear4bit(in_features=896, out_features=128, bias=True)
          (o_proj): Linear4bit(in_features=896, out_features=896, bias=False)
          (rotary_emb): Qwen2RotaryEmbedding()
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear4bit(in_features=896, out_features=4864, bias=False)
          (up_proj): Linear4bit(in_features=896, out_features=4864, bias=False)
          (down_proj): Linear4bit(in_features=4864, out_features=896, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
      )
    

In [None]:
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
trainable_percentage = (trainable_params / total_params) * 100

print('Total parameters :', total_params)
print('Trainable parameters :', trainable_params)
print('Trainable percentage: {:.2f}%'.format(trainable_percentage))

Total parameters : 315119488
Trainable parameters : 136178560
Trainable percentage: 43.21%


## 02 Import Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
#tokenizer

tokenizer_config.json:   0%|          | 0.00/7.23k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

## 03 Pre Test

In [None]:
def assistant(prompt):
  messages = [
    {'role' : 'human', 'content' : prompt},
  ]
  inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True,
    return_tensors = 'pt',
  ).to('cuda')
  generation_config = GenerationConfig(
    do_sample = True,
    top_k = 1,
    temperature = 0.1,
    max_new_tokens = 1024,
    pad_token_id = tokenizer.eos_token_id
  )
  outputs = model.generate(
    input_ids = inputs,
    generation_config = generation_config
  )
  return print(tokenizer.decode(outputs[0]))#, skip_special_tokens = True))

In [None]:
prompt = '''
Summarize the following legal text in a few sentences:

'In the case of John Doe v. XYZ Corp, the plaintiff, John Doe, entered into a formal service contract with XYZ Corp in January 2022. The agreement stipulated a one-year commitment for IT support services, with John Doe providing on-site troubleshooting, software updates, and system maintenance. XYZ Corp agreed to pay a fixed monthly retainer along with additional fees for after-hours support. However, in June 2022, XYZ Corp terminated the contract without prior notice, claiming that an unexpected downturn in business operations left them financially unable to continue. The plaintiff alleges wrongful termination, asserting that XYZ Corp failed to adhere to the 60-day notice clause outlined in the contract. Additionally, the plaintiff contends that the early termination damaged his professional reputation and resulted in significant financial losses, including missed client opportunities and incurred expenses for certifications specific to XYZ Corp’s systems. John Doe is seeking compensation for the remaining contract balance, damages for reputational harm, and reimbursement for training and certification costs required under the agreement.
'''
assistant(prompt)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>assistant
Sure, I can help you with that. What would you like to know?<|endoftext|>


In [None]:
prompt = "What are the legal implications if a party violates a confidentiality agreement in the context of contract law? For example, consider a scenario where a contractor working with Tech Innovators Inc. shares proprietary technology information with a competitor. Explain in detail and cite relevant case law where possible."
assistant(prompt)

<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>assistant
Sure, I can help you with that. What would you like to know?<|endoftext|>


In [None]:
prompt = "Rephrase the following legal statement to make it more understandable for a general audience: 'Under the terms of the non-compete agreement, the defendant is barred from engaging in any business that competes with the plaintiff's business within a 50-mile radius for two years following the termination of employment.' Retain all key information while simplifying the language."
assistant(prompt)

<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>assistant
Sure, I can help you with that. What would you like to know?<|endoftext|>


In [None]:
prompt = "In the case where a defendant claims breach of contract due to unforeseeable events, how does the principle of 'force majeure' apply? For instance, if a company was unable to deliver contracted goods due to a natural disaster, provide a detailed explanation and outline any relevant conditions under which the force majeure principle might or might not be applicable."
assistant(prompt)

<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>assistant
Sure, I can help you with that. What would you like to know?<|endoftext|>


In [None]:
prompt = "Construct an argument in defense of a client accused of breaching intellectual property laws due to sharing copyrighted material in an educational setting. For example, a teacher shares portions of a textbook with students to support classroom discussion. Focus on any legal exceptions or defenses that may apply, such as the fair use doctrine in educational contexts."
assistant(prompt)

<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>assistant
Sure, I can help you with that. What would you like to know?<|endoftext|>


## 04 Import Dataset

In [None]:
#url = 'https://huggingface.co/datasets/KingNish/reasoning-base-20k'
#dataset_name = url.split('datasets/')[-1]

dataset_name = 'mlabonne/FineTome-100k'

In [None]:
max_length = 1024

In [None]:
dataset = load_dataset(dataset_name, split = 'train')
dataset

Dataset({
    features: ['conversations', 'source', 'score'],
    num_rows: 100000
})

In [None]:
dataset.select(range(5)).to_pandas().head()

Unnamed: 0,conversations,source,score
0,"[{'from': 'human', 'value': 'Explain what bool...",infini-instruct-top-500k,5.212621
1,"[{'from': 'human', 'value': 'Explain how recur...",infini-instruct-top-500k,5.157649
2,"[{'from': 'human', 'value': 'Explain what bool...",infini-instruct-top-500k,5.14754
3,"[{'from': 'human', 'value': 'Explain the conce...",infini-instruct-top-500k,5.053656
4,"[{'from': 'human', 'value': 'Print the reverse...",infini-instruct-top-500k,5.045648


In [None]:
dataset[0]

{'conversations': [{'from': 'human',
   'value': 'Explain what boolean operators are, what they do, and provide examples of how they can be used in programming. Additionally, describe the concept of operator precedence and provide examples of how it affects the evaluation of boolean expressions. Discuss the difference between short-circuit evaluation and normal evaluation in boolean expressions and demonstrate their usage in code. \n\nFurthermore, add the requirement that the code must be written in a language that does not support short-circuit evaluation natively, forcing the test taker to implement their own logic for short-circuit evaluation.\n\nFinally, delve into the concept of truthiness and falsiness in programming languages, explaining how it affects the evaluation of boolean expressions. Add the constraint that the test taker must write code that handles cases where truthiness and falsiness are implemented differently across different programming languages.'},
  {'from': 'gpt

In [None]:
features = list(dataset.features.keys())
print(features)

['conversations', 'source', 'score']


## 05 Text Formatting

In [None]:
def transform_conversations(example):
  role_map = {
    'human' : 'user',
    'gpt' : 'assistant'
  }
  transformed_conversations = [
    {
      'role' : role_map.get(turn['from'], turn['from']),
      'content' : turn['value']
    }
    for turn in example['conversations']
  ]
  return {'conversations': transformed_conversations}

In [None]:
formatted_dataset = dataset.map(transform_conversations, remove_columns = features)
formatted_dataset

Dataset({
    features: ['conversations'],
    num_rows: 100000
})

In [None]:
print(formatted_dataset[0]['conversations'])

[{'content': 'Explain what boolean operators are, what they do, and provide examples of how they can be used in programming. Additionally, describe the concept of operator precedence and provide examples of how it affects the evaluation of boolean expressions. Discuss the difference between short-circuit evaluation and normal evaluation in boolean expressions and demonstrate their usage in code. \n\nFurthermore, add the requirement that the code must be written in a language that does not support short-circuit evaluation natively, forcing the test taker to implement their own logic for short-circuit evaluation.\n\nFinally, delve into the concept of truthiness and falsiness in programming languages, explaining how it affects the evaluation of boolean expressions. Add the constraint that the test taker must write code that handles cases where truthiness and falsiness are implemented differently across different programming languages.', 'role': 'user'}, {'content': 'Boolean operators are 

In [None]:
def format_conversation(example):
  for entry in example['conversations']:
    role = entry['role']
    content = entry['content']

    if role == 'user':
      formatted_text = f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}\n<|eot_id|>"
    elif role == 'assistant':
      formatted_text += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}\n<|eot_id|>"

  return {'prompt': formatted_text}

In [None]:
formatted_dataset = formatted_dataset.map(
  format_conversation,
  remove_columns = list(formatted_dataset.features.keys())
)
formatted_dataset

Dataset({
    features: ['prompt'],
    num_rows: 100000
})

In [None]:
print(formatted_dataset[0]['prompt'])

<|start_header_id|>user<|end_header_id|>

Explain what boolean operators are, what they do, and provide examples of how they can be used in programming. Additionally, describe the concept of operator precedence and provide examples of how it affects the evaluation of boolean expressions. Discuss the difference between short-circuit evaluation and normal evaluation in boolean expressions and demonstrate their usage in code. 

Furthermore, add the requirement that the code must be written in a language that does not support short-circuit evaluation natively, forcing the test taker to implement their own logic for short-circuit evaluation.

Finally, delve into the concept of truthiness and falsiness in programming languages, explaining how it affects the evaluation of boolean expressions. Add the constraint that the test taker must write code that handles cases where truthiness and falsiness are implemented differently across different programming languages.
<|eot_id|><|start_header_id|>a

## 06 Tokenization

In [None]:
def tokenize_data(example, max_length = max_length):
  return tokenizer(example['prompt'], truncation = True, padding = 'max_length', max_length = max_length)

In [None]:
tokenized_dataset = formatted_dataset.map(tokenize_data, batched = True)#, remove_columns = 'text')
tokenized_dataset

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

Dataset({
    features: ['prompt', 'input_ids', 'attention_mask'],
    num_rows: 100000
})

In [None]:
print(tokenized_dataset[0]['prompt'])

<|start_header_id|>user<|end_header_id|>

Explain what boolean operators are, what they do, and provide examples of how they can be used in programming. Additionally, describe the concept of operator precedence and provide examples of how it affects the evaluation of boolean expressions. Discuss the difference between short-circuit evaluation and normal evaluation in boolean expressions and demonstrate their usage in code. 

Furthermore, add the requirement that the code must be written in a language that does not support short-circuit evaluation natively, forcing the test taker to implement their own logic for short-circuit evaluation.

Finally, delve into the concept of truthiness and falsiness in programming languages, explaining how it affects the evaluation of boolean expressions. Add the constraint that the test taker must write code that handles cases where truthiness and falsiness are implemented differently across different programming languages.
<|eot_id|><|start_header_id|>a

In [None]:
dataset = tokenized_dataset.train_test_split(test_size = 0.1, seed = 42)
dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'input_ids', 'attention_mask'],
        num_rows: 90000
    })
    test: Dataset({
        features: ['prompt', 'input_ids', 'attention_mask'],
        num_rows: 10000
    })
})

In [None]:
train_dataset = dataset['train']
test_dataset = dataset['test']
train_dataset

Dataset({
    features: ['prompt', 'input_ids', 'attention_mask'],
    num_rows: 90000
})

In [None]:
train_dataset.select(range(5)).to_pandas().head()

Unnamed: 0,prompt,input_ids,attention_mask
0,<|start_header_id|>user<|end_header_id|>\n\nWh...,"[27, 91, 2468, 8757, 842, 91, 29, 872, 27, 91,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
1,<|start_header_id|>user<|end_header_id|>\n\nUs...,"[27, 91, 2468, 8757, 842, 91, 29, 872, 27, 91,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
2,<|start_header_id|>user<|end_header_id|>\n\nCr...,"[27, 91, 2468, 8757, 842, 91, 29, 872, 27, 91,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
3,<|start_header_id|>user<|end_header_id|>\n\nWr...,"[27, 91, 2468, 8757, 842, 91, 29, 872, 27, 91,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
4,<|start_header_id|>user<|end_header_id|>\n\nTh...,"[27, 91, 2468, 8757, 842, 91, 29, 872, 27, 91,...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."


In [None]:
print(train_dataset[0]['prompt'])

<|start_header_id|>user<|end_header_id|>

What is the algorithm to detect if a linked list contains a cycle, and if a cycle is present, determine the node at which the cycle begins?
<|eot_id|><|start_header_id|>assistant<|end_header_id|>

The algorithm to detect cycle in linked list is famously known as Floyd's Cycle-Finding Algorithm, or the Tortoise and the Hare algorithm.

Here's how the algorithm works:

1. Initialize two pointers, slow and fast at the head of the linked list.
2. Move slow pointer by one and fast pointer by two. If there's a cycle in the list, the fast pointer will eventually meet the slow pointer.
3. When they meet, reset the slow pointer to the head while leaving the fast pointer at the meeting point.
4. Now, advance both slow and fast pointers at the same pace, one step at a time. The point at which they meet now is the start of the cycle.

Here's the code in Python:

```python
def detectCycle(head):
    slow = fast = head
    while fast and fast.next:
        s

In [None]:
print(train_dataset[0]['input_ids'])

[27, 91, 2468, 8757, 842, 91, 29, 872, 27, 91, 408, 8757, 842, 91, 1339, 3838, 374, 279, 12111, 311, 11140, 421, 264, 10592, 1140, 5610, 264, 10775, 11, 323, 421, 264, 10775, 374, 3042, 11, 8253, 279, 2436, 518, 892, 279, 10775, 12033, 5267, 27, 91, 68, 354, 842, 91, 1784, 91, 2468, 8757, 842, 91, 29, 77091, 27, 91, 408, 8757, 842, 91, 1339, 785, 12111, 311, 11140, 10775, 304, 10592, 1140, 374, 50187, 3881, 438, 45799, 594, 41292, 7276, 3961, 40325, 11, 476, 279, 55827, 68189, 323, 279, 472, 546, 12111, 382, 8420, 594, 1246, 279, 12111, 4278, 1447, 16, 13, 9008, 1378, 27454, 11, 6301, 323, 4937, 518, 279, 1968, 315, 279, 10592, 1140, 624, 17, 13, 14561, 6301, 7445, 553, 825, 323, 4937, 7445, 553, 1378, 13, 1416, 1052, 594, 264, 10775, 304, 279, 1140, 11, 279, 4937, 7445, 686, 9583, 3367, 279, 6301, 7445, 624, 18, 13, 3197, 807, 3367, 11, 7585, 279, 6301, 7445, 311, 279, 1968, 1393, 9380, 279, 4937, 7445, 518, 279, 6438, 1459, 624, 19, 13, 4695, 11, 11912, 2176, 6301, 323, 4937, 27454, 

In [None]:
print(train_dataset[0]['attention_mask'])

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

## 07 Data Collator Set Up

In [None]:
#data_collator = DataCollatorWithPadding(tokenizer = tokenizer)
#data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer)
data_collator = DataCollatorForLanguageModeling(tokenizer = tokenizer, mlm = False)

## 08 Evaluation Metrics Set Up

In [None]:
def compute_metrics(p: EvalPrediction):
  preds = np.argmax(p.predictions, axis = 1)
  precision, recall, f1, _ = precision_recall_fscore_support(
    p.label_ids,
    preds,
    average = 'weighted'
  )
  matrix = {
    'accuracy': accuracy_score(p.label_ids, preds),
    'f1': f1, 'precision': precision,
    'recall': recall
  }
  return matrix

In [None]:
torch.cuda.empty_cache()

## 09 Set Up PEFT / LoRA / QLoRA

In [None]:
lora_alpha = 16
lora_dropout = 0.1
lora_r = 64
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                  "gate_proj", "up_proj", "down_proj",]
peft_config = LoraConfig(
  lora_alpha = lora_alpha,
  lora_dropout = lora_dropout,
  r = lora_r,
  bias = 'none',
  task_type = 'CAUSAL_LM',
  target_modules = target_modules,
)

In [None]:
peft_model = get_peft_model(model, peft_config, adapter_name = 'math')
peft_model.print_trainable_parameters()

trainable params: 35,192,832 || all params: 529,225,600 || trainable%: 6.6499


## 10 Training Model

In [None]:
model

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 896)
    (layers): ModuleList(
      (0-23): 24 x Qwen2DecoderLayer(
        (self_attn): Qwen2SdpaAttention(
          (q_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=896, out_features=896, bias=True)
            (lora_dropout): ModuleDict(
              (math): Dropout(p=0.1, inplace=False)
            )
            (lora_A): ModuleDict(
              (math): Linear(in_features=896, out_features=64, bias=False)
            )
            (lora_B): ModuleDict(
              (math): Linear(in_features=64, out_features=896, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=896, out_features=128, bias=True)
            (lora_dropout): ModuleDict(
              (math):

In [None]:
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
trainable_percentage = (trainable_params / total_params) * 100

print('Total parameters :', total_params)
print('Trainable parameters :', trainable_params)
print('Trainable percentage: {:.2f}%'.format(trainable_percentage))

Total parameters : 350312320
Trainable parameters : 35192832
Trainable percentage: 10.05%


In [None]:
torch.cuda.empty_cache()

In [None]:
save_path = './model'

batch_size = 2
max_steps = 200
training_args = TrainingArguments(
  output_dir = save_path,
  gradient_accumulation_steps = 4,
  evaluation_strategy = 'steps',
  do_eval = True,
  per_device_train_batch_size = batch_size,
  per_device_eval_batch_size = 4,
  log_level = 'debug',
  save_strategy = 'no',
  save_total_limit = 2,
  save_safetensors = False,
  fp16 = False,
  logging_steps = 20,
  learning_rate = 2e-5,
  eval_steps = 20,
  max_steps = max_steps,
  warmup_steps = 30,
  lr_scheduler_type = 'cosine',
)
training_args



TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
average_tokens_across_devices=False,
batch_eval_metrics=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=20,
eval_strategy=steps,
eval_use_gather_object=F

In [None]:
trainer = SFTTrainer(
  model = model,
  train_dataset = train_dataset,#.select(range(10000)),
  eval_dataset = test_dataset.select(range(1000)),
  dataset_text_field = 'prompt',
  max_seq_length = max_length,
  tokenizer = tokenizer,
  args = training_args,
  peft_config = peft_config,
)
trainer


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
max_steps is given, it will override any value given in num_train_epochs


<trl.trainer.sft_trainer.SFTTrainer at 0x7fb0af7da110>

In [None]:
trainer.train()

Currently training with a batch size of: 2
The following columns in the training set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: prompt. If prompt are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 90,000
  Num Epochs = 1
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 4
  Total optimization steps = 200
  Number of trainable parameters = 35,192,832
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mterlupakan100[0m ([33mterlupakan100-[0m). Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss,Validation Loss
20,1.3455,1.312746


The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: prompt. If prompt are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: prompt. If prompt are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


Step,Training Loss,Validation Loss
20,1.3455,1.312746
40,1.2342,1.218575
60,1.1672,1.095166
80,1.096,1.053129
100,1.0043,1.035356
120,1.0822,1.023565
140,1.0116,1.014683
160,1.0256,1.009678
180,0.9756,1.007857
200,1.0031,1.007593


The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: prompt. If prompt are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: prompt. If prompt are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4
The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: prompt. If prompt are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4
The following columns in the evaluation set don't have 

TrainOutput(global_step=200, training_loss=1.094542646408081, metrics={'train_runtime': 3667.9152, 'train_samples_per_second': 0.436, 'train_steps_per_second': 0.055, 'total_flos': 4210200831590400.0, 'train_loss': 1.094542646408081, 'epoch': 0.017777777777777778})

## 11 Model Evaluation

In [None]:
evaluation_results = trainer.evaluate()
print('Evaluation Results:', evaluation_results)

The following columns in the evaluation set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: prompt. If prompt are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 1000
  Batch size = 4


Evaluation Results: {'eval_loss': 1.0075932741165161, 'eval_runtime': 253.4713, 'eval_samples_per_second': 3.945, 'eval_steps_per_second': 0.986, 'epoch': 0.017777777777777778}


## 12 Save Model

In [None]:
save_model = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model
save_model.save_pretrained(save_path)

## 13 Load PEFT Model

In [None]:
torch.cuda.empty_cache()

In [None]:
peft_model = PeftModel.from_pretrained(model, save_path)
peft_model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen2ForCausalLM(
      (model): Qwen2Model(
        (embed_tokens): Embedding(151936, 896)
        (layers): ModuleList(
          (0-23): 24 x Qwen2DecoderLayer(
            (self_attn): Qwen2SdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=896, out_features=896, bias=True)
                (lora_dropout): ModuleDict(
                  (math): Dropout(p=0.1, inplace=False)
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (math): Linear(in_features=896, out_features=64, bias=False)
                  (default): Linear(in_features=896, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (math): Linear(in_features=64, out_features=896, bias=False)
                  (default): Linear(in_features=64, out_features=896, bias=False)
        

In [None]:
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
trainable_percentage = (trainable_params / total_params) * 100

print('Total parameters :', total_params)
print('Trainable parameters :', trainable_params)
print('Trainable percentage: {:.2f}%'.format(trainable_percentage))

Total parameters : 385505152
Trainable parameters : 0
Trainable percentage: 0.00%


## 14 Post Test

In [None]:
def assistant(prompt):
  messages = [
    {'role' : 'human', 'content' : prompt},
  ]
  inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True,
    return_tensors = 'pt',
  ).to('cuda')
  generation_config = GenerationConfig(
    do_sample = True,
    top_k = 1,
    temperature = 0.1,
    max_new_tokens = 1024,
    pad_token_id = tokenizer.eos_token_id
  )
  outputs = peft_model.generate(
    input_ids = inputs,
    generation_config = generation_config
  )
  return print(tokenizer.decode(outputs[0]))#, skip_special_tokens = True))

In [None]:
prompt = '''
Summarize the following legal text in a few sentences:

'In the case of John Doe v. XYZ Corp, the plaintiff, John Doe, entered into a formal service contract with XYZ Corp in January 2022. The agreement stipulated a one-year commitment for IT support services, with John Doe providing on-site troubleshooting, software updates, and system maintenance. XYZ Corp agreed to pay a fixed monthly retainer along with additional fees for after-hours support. However, in June 2022, XYZ Corp terminated the contract without prior notice, claiming that an unexpected downturn in business operations left them financially unable to continue. The plaintiff alleges wrongful termination, asserting that XYZ Corp failed to adhere to the 60-day notice clause outlined in the contract. Additionally, the plaintiff contends that the early termination damaged his professional reputation and resulted in significant financial losses, including missed client opportunities and incurred expenses for certifications specific to XYZ Corp’s systems. John Doe is seeking compensation for the remaining contract balance, damages for reputational harm, and reimbursement for training and certification costs required under the agreement.
'''
assistant(prompt)

<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>assistant
Sure, I can help you with that. What would you like to know?
🤨
You are a helpful assistant.🤨
AILABLE
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful a

In [None]:
prompt = "What are the legal implications if a party violates a confidentiality agreement in the context of contract law? For example, consider a scenario where a contractor working with Tech Innovators Inc. shares proprietary technology information with a competitor. Explain in detail and cite relevant case law where possible."
assistant(prompt)

<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>assistant
Sure, I can help you with that. What would you like to know?
🤨
You are a helpful assistant.🤨
AILABLE
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful a

In [None]:
prompt = "Rephrase the following legal statement to make it more understandable for a general audience: 'Under the terms of the non-compete agreement, the defendant is barred from engaging in any business that competes with the plaintiff's business within a 50-mile radius for two years following the termination of employment.' Retain all key information while simplifying the language."
assistant(prompt)

<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>assistant
Sure, I can help you with that. What would you like to know?
🤨
You are a helpful assistant.🤨
AILABLE
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful a

In [None]:
prompt = "In the case where a defendant claims breach of contract due to unforeseeable events, how does the principle of 'force majeure' apply? For instance, if a company was unable to deliver contracted goods due to a natural disaster, provide a detailed explanation and outline any relevant conditions under which the force majeure principle might or might not be applicable."
assistant(prompt)

<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>assistant
Sure, I can help you with that. What would you like to know?
🤨
You are a helpful assistant.🤨
AILABLE
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful a

In [None]:
prompt = "Construct an argument in defense of a client accused of breaching intellectual property laws due to sharing copyrighted material in an educational setting. For example, a teacher shares portions of a textbook with students to support classroom discussion. Focus on any legal exceptions or defenses that may apply, such as the fair use doctrine in educational contexts."
assistant(prompt)

<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>assistant
Sure, I can help you with that. What would you like to know?
🤨
You are a helpful assistant.🤨
AILABLE
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful assistant.🤨
Available
You are a helpful a