In [5]:
import pandas as pd
from sklearn.datasets import fetch_20newsgroups

from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset

categories = ['sci.space', 'rec.autos', 'comp.graphics']

# Check if data already exists to avoid re-fetching
import os
if os.path.exists('newsgroups_data.csv'):
    print("Loading existing data...")
    data = pd.read_csv('newsgroups_data.csv')
else:
    print("Fetching data...")
    newsgroups = fetch_20newsgroups(subset='all', categories=categories)
    data = pd.DataFrame({'text': newsgroups.data, 'label': newsgroups.target})
    data.to_csv('newsgroups_data.csv', index=False)
    print("Data saved to newsgroups_data.csv")

train_data, temp_data = train_test_split(data, test_size=0.3, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

print(f"Train size: {len(train_data)}, Validation size: {len(val_data)}, Test size: {len(test_data)}"   )

Loading existing data...
Train size: 2065, Validation size: 442, Test size: 443


In [6]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=512)

#convert to datasets and tokenize
train_dataset = Dataset.from_pandas(train_data)
val_dataset = Dataset.from_pandas(val_data)
test_dataset = Dataset.from_pandas(test_data)   

#apply tokenization
train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

#keep only necessary columns
columns_to_remove = [col for col in train_dataset.column_names if col not in ['input_ids', 'attention_mask', 'label']]

train_dataset = train_dataset.remove_columns(columns_to_remove)
val_dataset = val_dataset.remove_columns(columns_to_remove)
test_dataset = test_dataset.remove_columns(columns_to_remove)   

train_sample = train_dataset.shuffle(seed=42).select(range(int(0.025 * len(train_dataset))))

print("\Dataset format:")
print(train_sample[0])
print(f"\nSample size after downsampling: {len(train_sample)}")


  print("\Dataset format:")
Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2065/2065 [00:06<00:00, 340.42 examples/s]

Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 442/442 [00:01<00:00, 384.64 examples/s]
Map:   0%|          | 0/443 [00:00<?, ? examples/s]
Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 443/443 [00:01<00:00, 327.90 examples/s]

\Dataset format:
{'label': 0, 'input_ids': [101, 2013, 1024, 11320, 10286, 4305, 1030, 20116, 1012, 6901, 1012, 3968, 2226, 1006, 5696, 11320, 10286, 4305, 1007, 3395, 1024, 2559, 2005, 2720, 1012, 10958, 5280, 3029, 1024, 2110, 2118, 1997, 2047, 2259, 2012, 6901, 1013, 4012, 2361, 16596, 3210, 1024, 1023, 1050, 3372, 2361, 1011, 14739, 1011, 3677, 1024, 7570, 2863, 2213, 1012, 20116, 1012, 6901, 1012, 3968, 2226, 2515, 3087, 2031, 1037, 10958, 5280, 10938, 1999, 1039, 2008, 2027, 2071, 4604, 2033, 1029, 2151, 2393, 3970, 1010, 1011, 1011, 1064, 1012, 1011, 1010, 1001, 1001, 1001, 1064, 2005, 1037, 2843, 1997, 1012, 8740, 2189, 1024, 3027, 2361, 4165, 1012, 17371, 6342, 1012, 3968, 2226, 1064, 1013, 1013, 1035, 1035, 1010, 1035, 1001, 1001, 1001, 1064, 2059, 4937, 5371, 1012, 8740, 1028, 1013, 16475, 1013, 5746, 1064, 1032, 1035, 1028, 1013, 1028, 1035, 1013, 1006, 1035, 1013, 1032, 1035, 1013, 1026, 1028, 1035, 1064, 1057, 2497, 3075, 12105, 1024, 10093, 7159, 22285, 1012, 9353, 6342,




In [9]:
#tradiontal fine runing
from transformers import BertForSequenceClassification, Trainer, TrainingArguments
from transformers import DataCollatorWithPadding

model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

training_args = TrainingArguments(
    output_dir='./results_traditional',
    num_train_epochs=1,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    eval_strategy='epoch',
    logging_dir='./logs_traditional',
    logging_steps=10,
    learning_rate=2e-5,
    report_to='none' #disable wandb logging
)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_sample,
    eval_dataset=val_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer
)

train_result = trainer.train()
print("\nTraining completed.")
print(f"Training Loss: {train_result.training_loss}")


# Save the model using save_pretrained (correct method for the model)
model.save_pretrained('./traditional_finetuned_model')
tokenizer.save_pretrained('./traditional_finetuned_model')



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,1.0932,1.093007





Training completed.
Training Loss: 1.1095425568374933


('./traditional_finetuned_model\\tokenizer_config.json',
 './traditional_finetuned_model\\special_tokens_map.json',
 './traditional_finetuned_model\\vocab.txt',
 './traditional_finetuned_model\\added_tokens.json')

## LoRA fine tuning

In [5]:
%pip install peft

Collecting peft
  Downloading peft-0.17.1-py3-none-any.whl.metadata (14 kB)
Downloading peft-0.17.1-py3-none-any.whl (504 kB)
Installing collected packages: peft
Successfully installed peft-0.17.1
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [32]:
%pip install transformers==4.30.0 peft==0.4.0 --force-reinstall

Collecting transformers==4.30.0
  Downloading transformers-4.30.0-py3-none-any.whl.metadata (113 kB)
Collecting peft==0.4.0
  Downloading peft-0.4.0-py3-none-any.whl.metadata (21 kB)
Collecting filelock (from transformers==4.30.0)
  Downloading filelock-3.20.0-py3-none-any.whl.metadata (2.1 kB)
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers==4.30.0)
  Downloading huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)
Collecting numpy>=1.17 (from transformers==4.30.0)
  Downloading numpy-2.3.4-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting packaging>=20.0 (from transformers==4.30.0)
  Downloading packaging-25.0-py3-none-any.whl.metadata (3.3 kB)
Collecting pyyaml>=5.1 (from transformers==4.30.0)
  Downloading pyyaml-6.0.3-cp313-cp313-win_amd64.whl.metadata (2.4 kB)
Collecting regex!=2019.12.17 (from transformers==4.30.0)
  Downloading regex-2025.11.3-cp313-cp313-win_amd64.whl.metadata (41 kB)
Collecting requests (from transformers==4.30.0)
  Downloading requests-2.3

  error: subprocess-exited-with-error
  
  Ã— Building wheel for tokenizers (pyproject.toml) did not run successfully.
  â”‚ exit code: 1
  â•°â”€> [62 lines of output]
      !!
      
              ********************************************************************************
              Please consider removing the following classifiers in favor of a SPDX license expression:
      
              License :: OSI Approved :: Apache Software License
      
              See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#license for details.
              ********************************************************************************
      
      !!
        self._finalize_license_expression()
      running bdist_wheel
      running build
      running build_py
      creating build\lib.win-amd64-cpython-313\tokenizers
      copying py_src\tokenizers\__init__.py -> build\lib.win-amd64-cpython-313\tokenizers
      creating build\lib.win-amd64-cpython-313\tokenizer

In [4]:
#lora fine runing

from transformers import BertForSequenceClassification, Trainer, TrainingArguments
from transformers import DataCollatorWithPadding
from peft import LoraConfig, get_peft_model, TaskType

model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

lora_config = LoraConfig(
    
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_CLS,
)

model = get_peft_model(model, lora_config)

print("Trainable parameters after applying LoRA:")
model.print_trainable_parameters()


training_args = TrainingArguments(
    output_dir='./results_lora',
    num_train_epochs=1,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    eval_strategy='epoch',
    logging_dir='./logs_lora',
    logging_steps=10,
    learning_rate=2e-5,
    report_to='none', #disable wandb logging,
    #use_cpu=False
    remove_unused_columns=False,

)


data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_sample,
    eval_dataset=val_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer
)

#train the model
train_result = trainer.train()
print("\nTraining completed.")
print(f"Training Loss1: {train_result.training_loss}")

model.save_pretrained('./lora_finetuned_model')
tokenizer.save_pretrained('./lora_finetuned_model')



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
  trainer = Trainer(


Trainable parameters after applying LoRA:
trainable params: 297,219 || all params: 109,781,766 || trainable%: 0.2707




Epoch,Training Loss,Validation Loss
1,No log,1.133511





Training completed.
Training Loss1: 1.1161257198878698


('./lora_finetuned_model\\tokenizer_config.json',
 './lora_finetuned_model\\special_tokens_map.json',
 './lora_finetuned_model\\vocab.txt',
 './lora_finetuned_model\\added_tokens.json')

## QLora Fine Turning

#Load data and tokenize

In [8]:
#QLora fine runing

from transformers import BertForSequenceClassification, Trainer, TrainingArguments
from transformers import DataCollatorWithPadding
from peft import LoraConfig, get_peft_model, TaskType

model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)


#Define LoRA configuration with explicit layer names

target_mmodules = [
    "bert.encoder.layer.0.attention.self.query",
    "bert.encoder.layer.0.attention.self.key", 
    "bert.encoder.layer.0.attention.self.value",
        "bert.encoder.layer.1.attention.self.query",
    "bert.encoder.layer.1.attention.self.key", 
    "bert.encoder.layer.1.attention.self.value"
      
]

lora_config = LoraConfig(
    
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_CLS,
)

model = get_peft_model(model, lora_config)

print("Trainable parameters after applying LoRA:")
model.print_trainable_parameters()


training_args = TrainingArguments(
    output_dir='./results_lora',
    num_train_epochs=1,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    eval_strategy='epoch',
    logging_dir='./logs_lora',
    logging_steps=10,
    learning_rate=2e-5,
    report_to='none', #disable wandb logging,
    #use_cpu=False
    remove_unused_columns=False,
    no_cuda=True,
    fp16=False,
    fp16_full_eval=False,
    dataloader_num_workers=0,
    dataloader_pin_memory=False,

)


data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_sample,
    eval_dataset=val_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer
)

#train the model
train_result = trainer.train()
print("\nTraining completed.")
print(f"Training Loss1: {train_result.training_loss}")

model.save_pretrained('./qlora_finetuned_model')
tokenizer.save_pretrained('./qlora_finetuned_model')



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
  trainer = Trainer(


Trainable parameters after applying LoRA:
trainable params: 297,219 || all params: 109,781,766 || trainable%: 0.2707


Epoch,Training Loss,Validation Loss
1,No log,1.117138



Training completed.
Training Loss1: 1.144556999206543


('./qlora_finetuned_model\\tokenizer_config.json',
 './qlora_finetuned_model\\special_tokens_map.json',
 './qlora_finetuned_model\\vocab.txt',
 './qlora_finetuned_model\\added_tokens.json')

In [None]:
#compare results based on memory, lora and qlora
