In [1]:
# !pip install transformers
# !pip install accelerate
# !pip install -U bitsandbytes
# !pip install datasets
# !pip install trl
# !pip install unsloth

In [2]:
import unsloth

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
from huggingface_hub import login

hf_token = '' # use your HF token
login(hf_token)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

model_name = "google/gemma-3-270m-it"
# model_name = "cike-dev/GemmaBullyClassifier-e"
# model_name = "cike-dev/Gemma_3_Toxic_Classifier" # works fine
# model_name = "cike-dev/GemmaToxicClassifier"
# # model_name = "cike-dev/GemmaToxicity"

# Auto GPU + dtype handling
device_map = "auto"
torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32

# Optional: bitsandbytes config (like unsloth’s flags)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=False,
    load_in_8bit=False,
    bnb_4bit_compute_dtype=torch_dtype,
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    dtype=torch_dtype,
    device_map=device_map,
    quantization_config=bnb_config,
    token=hf_token,
)

# Patch chat template (same as unsloth)
tokenizer.chat_template = """{{ bos_token }}{% for message in messages %}
<start_of_turn>{{message['role']}}
{{message['content']}}<end_of_turn>
{% endfor %}{% if add_generation_prompt %}<start_of_turn>model
{% endif %}"""


In [5]:
print(model.config.max_position_embeddings)
print(model.device)
print(tokenizer.chat_template)

32768
cuda:0
{{ bos_token }}{% for message in messages %}
<start_of_turn>{{message['role']}}
{{message['content']}}<end_of_turn>
{% endfor %}{% if add_generation_prompt %}<start_of_turn>model
{% endif %}


### Testing classification ability of the model before finetuning:

In [6]:
messages = [
	# {"role": "system",
	# "content": "Classify the text as 'toxic' or 'normal'. Output only one word."},
    {"role": "user", 
	"content": "Classify the text as 'toxic' or 'normal'. Output only one word. Text: 'you either stand with your people or they will stand over your dead body .'"},
]

inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)
# )
# inputs
outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

While the text is described as "toxic," it's not actually "toxic." It's purely descriptive and doesn't contain any actual life experiences.<end_of_turn>


In [7]:
# raw_set = load_dataset("cike-dev/en_toxic_set", split='test')


# for i in raw_set.select(range(2)):
#     print(i['cleaned_text'])

# raw_set['cleaned_text'][100]

### Few-shot testing

using prompt engineering to steer the model's tone can produce unpredictable and may not always align with the expected persona.

In [8]:
from transformers import pipeline
from datasets import load_dataset

# Load the model and tokenizer into the pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)


raw_set = load_dataset("cike-dev/en_toxic_set", split='test')


message = [
    # give persona
    {"role": "system", "content": "You are a binary classifier for cyberbullying speech. Respond with one word: 'toxic' or 'normal'."},
]


# few shot prompt
for item in raw_set.select(range(2)):
  message.append(
      {"role": "user", "content": item["cleaned_text"]}
  )
  message.append(
      {"role": "assistant", "content": item["label"]}
  )

# actual question
message.append(
    {"role": "user", "content": raw_set["cleaned_text"][100]}
)

outputs = pipe(message, max_new_tokens=256, disable_compile=True)
print(outputs[0]['generated_text'])
print("-"*80)
print(outputs[0]['generated_text'][-1]['content'])

Device set to use cuda:0


[{'role': 'system', 'content': "You are a binary classifier for cyberbullying speech. Respond with one word: 'toxic' or 'normal'."}, {'role': 'user', 'content': 'the fundamental basis of free speech is the marketplace of ideas.'}, {'role': 'assistant', 'content': 'normal'}, {'role': 'user', 'content': 'i do not silence anybody that kike shit not only do i not silence people i give them a voice on my open phones talk show the fact that there are so many people deserving of my scorn is a separate topic entirely'}, {'role': 'assistant', 'content': 'toxic'}, {'role': 'user', 'content': 'fire her nasty ass.'}, {'role': 'assistant', 'content': 'I am a large language model this response is a response based on your previous response. I am ready to receive your response and then to provide helpful information and assistance in a way that is respectful and appropriate.'}]
--------------------------------------------------------------------------------
I am a large language model this response is

In [9]:
inputs

{'input_ids': tensor([[     2,    105,   2364,    107,   4335,   1891,    506,   1816,    618,
            756,  72401, 236789,    653,    756,   7382,   6748,  16887,   1186,
            886,   3658, 236761,   5590, 236787,    756,   7624,   3477,   1975,
            607,    822,   1331,    653,    901,    795,   1975,   1024,    822,
           6582,   2742, 209442,    106,    107,    105,   4368,    107]],
       device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
       device='cuda:0')}

# Training

In [10]:
from unsloth import FastLanguageModel

model_name = "google/gemma-3-270m-it"
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name,
    max_seq_length=256,
    dtype=torch.bfloat16,  # auto detect bf16/fp16
    load_in_4bit = False,  # 4 bit quantization to reduce memory
    load_in_8bit = False, # A bit more accurate, uses 2x memory
    full_finetuning = True,  # full fine-tuning
)


==((====))==  Unsloth 2025.9.4: Fast Gemma3_Text patching. Transformers: 4.56.1.
   \\   /|    NVIDIA L4. Num GPUs = 1. Max memory: 22.278 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using bfloat16 full finetuning which cuts memory usage by 50%.


In [11]:
# ===========================
# 2. Load Dataset
# ===========================
from datasets import load_dataset


dataset = load_dataset("cike-dev/en_toxic_set")
# train split
train_dataset = dataset["train"]
# train_dataset = train_dataset.select(range(10000))

eval_dataset = dataset["validation"]
eval_dataset = eval_dataset.select(range(4000))
# test_dataset = dataset["test"]


print(train_dataset[3000])


# Obtain the correct chat template
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma3",
)


{'cleaned_text': 'methinks grahamnasty has seen the writing on the wall after the mark sanford loss...', 'label': 'normal'}


In [12]:
def convert_to_chatml(example):
    system_message = "You are a binary classifier for cyberbullying speech. Respond with one word: 'toxic' or 'normal'."
    user_prompt = f"Classify this text: '{example['cleaned_text']}'"
    
    return {
        "conversations": [
            {"role": "system", "content": system_message},
            {"role": "user", "content": user_prompt},
            {"role": "assistant", "content": example["label"]},
        ]
    }


train_set = train_dataset.map(convert_to_chatml)
eval_set = eval_dataset.map(convert_to_chatml)

display(train_set['conversations'][1])
print()
display(eval_set['conversations'][1])

[{'content': "You are a binary classifier for cyberbullying speech. Respond with one word: 'toxic' or 'normal'.",
  'role': 'system'},
 {'content': "Classify this text: 'the definition of traitor is then the issue  say you did business with a jew, but nothing special, does that make you a traitor, etc.'",
  'role': 'user'},
 {'content': 'normal', 'role': 'assistant'}]




[{'content': "You are a binary classifier for cyberbullying speech. Respond with one word: 'toxic' or 'normal'.",
  'role': 'system'},
 {'content': "Classify this text: 'same in the uk election postal vote is exploited by muslims in particular female muslims give their vote to whomever the imam says if it a muslim candidate they just bring voters in register from a muslim house and job done'",
  'role': 'user'},
 {'content': 'toxic', 'role': 'assistant'}]

In [13]:
def formatting_prompts_func(examples):
   msgs = examples["conversations"]
   texts = [tokenizer.apply_chat_template(msg, tokenize = False, add_generation_prompt = False).removeprefix('<bos>') for msg in msgs]
   return { "text" : texts, }

# Apply the function to the datasets

train_set = train_set.map(formatting_prompts_func, batched=True)

eval_set = eval_set.map(formatting_prompts_func, batched=True)

display(train_set['text'][1])
print()
display(eval_set['text'][1])

"<start_of_turn>user\nYou are a binary classifier for cyberbullying speech. Respond with one word: 'toxic' or 'normal'.\n\nClassify this text: 'the definition of traitor is then the issue  say you did business with a jew, but nothing special, does that make you a traitor, etc.'<end_of_turn>\n<start_of_turn>model\nnormal<end_of_turn>\n"




"<start_of_turn>user\nYou are a binary classifier for cyberbullying speech. Respond with one word: 'toxic' or 'normal'.\n\nClassify this text: 'same in the uk election postal vote is exploited by muslims in particular female muslims give their vote to whomever the imam says if it a muslim candidate they just bring voters in register from a muslim house and job done'<end_of_turn>\n<start_of_turn>model\ntoxic<end_of_turn>\n"

In [14]:
# ===========================
# 4. Training Config
# ===========================
import os
from trl import SFTConfig, SFTTrainer


saved_model = "./Gemma3ToxicTextClassifier"

os.makedirs(saved_model, exist_ok=True)
torch_dtype = model.dtype

sft_args = SFTConfig(
    dataset_text_field="text",      # dataset column with input text
    per_device_train_batch_size=8,
    gradient_accumulation_steps=1,
    gradient_checkpointing=False,
    warmup_ratio=0.1,
    num_train_epochs=5,
    max_length=256,
    learning_rate=3e-5,
    logging_steps=500,                      # avoid logging every step
    optim="adamw_torch_fused",              # fused optimizer
    weight_decay=0.01,
    lr_scheduler_type="constant",           # conctant, linear, or cosine
    seed=2025,
    output_dir=saved_model,
    report_to="none",
    bf16=torch_dtype == torch.bfloat16,                              # use bf16 if GPU supports, else fp16=True
    fp16=torch_dtype == torch.float16,
    # metric_for_best_model="eval_loss",    # or f1_score, is computed correctly
    # greater_is_better=False,
    # load_best_model_at_end=True,
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    save_safetensors=True,
    dataset_kwargs={
        "add_special_tokens": False, # Template with special tokens
        "append_concat_token": True, # Add EOS token as separator token between examples
    }
)

trainer = SFTTrainer(
    model=model,
    processing_class=tokenizer,
    train_dataset=train_set,
    eval_dataset=eval_set,
    args=sft_args,
    # formatting_func = formatting_func,**kwargs
)

torch_dtype == torch.float16

False

In [15]:
# ===========================
# 5. Train
# ===========================
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 76,259 | Num Epochs = 5 | Total steps = 47,665
O^O/ \_/ \    Batch size per device = 8 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (8 x 1 x 1) = 8
 "-____-"     Trainable parameters = 268,098,176 of 268,098,176 (100.00% trained)


Epoch,Training Loss,Validation Loss
1,1.6889,1.65173
2,1.6127,1.643584
3,1.5735,1.650423
4,1.5221,1.658994
5,1.5043,1.669838


Unsloth: Will smartly offload gradients to save VRAM!


In [21]:
trainer_stats

TrainOutput(global_step=47665, training_loss=1.5785336285463776, metrics={'train_runtime': 10735.0355, 'train_samples_per_second': 35.519, 'train_steps_per_second': 4.44, 'total_flos': 3.016052380955443e+16, 'train_loss': 1.5785336285463776, 'epoch': 5.0})

In [16]:
trainer.save_model(saved_model)

In [17]:
tokenizer.save_pretrained(saved_model)

('./Gemma3ToxicTextClassifier/tokenizer_config.json',
 './Gemma3ToxicTextClassifier/special_tokens_map.json',
 './Gemma3ToxicTextClassifier/chat_template.jinja',
 './Gemma3ToxicTextClassifier/tokenizer.model',
 './Gemma3ToxicTextClassifier/added_tokens.json',
 './Gemma3ToxicTextClassifier/tokenizer.json')

In [18]:
trainer.push_to_hub("cike-dev/Gemma3ToxicTextClassifier")

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...ToxicTextClassifier/tokenizer.model:  97%|#########7| 4.55MB / 4.69MB            

  ...xicTextClassifier/model.safetensors:   0%|          |  612kB /  536MB            

  ...3ToxicTextClassifier/tokenizer.json:  97%|#########7| 32.4MB / 33.4MB            

  ...xicTextClassifier/training_args.bin: 100%|##########| 6.35kB / 6.35kB            

CommitInfo(commit_url='https://huggingface.co/cike-dev/Gemma3ToxicTextClassifier/commit/c75d7b607c8a58d344732058b3b9c0661fc3d815', commit_message='cike-dev/Gemma3ToxicTextClassifier', commit_description='', oid='c75d7b607c8a58d344732058b3b9c0661fc3d815', pr_url=None, repo_url=RepoUrl('https://huggingface.co/cike-dev/Gemma3ToxicTextClassifier', endpoint='https://huggingface.co', repo_type='model', repo_id='cike-dev/Gemma3ToxicTextClassifier'), pr_revision=None, pr_num=None)

In [20]:
import matplotlib.pyplot as plt

# Access the log history
log_history = trainer._state.log_history

# Extract training / validation loss
train_losses = [log["loss"] for log in log_history if "loss" in log]
epoch_train = [log["epoch"] for log in log_history if "loss" in log]
eval_losses = [log["eval_loss"] for log in log_history if "eval_loss" in log]
epoch_eval = [log["epoch"] for log in log_history if "eval_loss" in log]

# Plot the training loss
plt.plot(epoch_train, train_losses, label="Training Loss")
plt.plot(epoch_eval, eval_losses, label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training and Validation Loss per Epoch")
plt.legend()
plt.grid(True)
plt.show()

AttributeError: 'UnslothSFTTrainer' object has no attribute '_state'

In [18]:
# # ===========================
# # 6. Batched Evaluation
# # ===========================
# def batched_eval(trainer, dataset, batch_size=16, max_new_tokens=8):
#     model = trainer.model
#     tok = trainer.tokenizer
#     device = next(model.parameters()).device
#     model.eval()

#     texts = [ex["cleaned_text"] for ex in dataset]
#     labels = [1 if ex["label"].lower()=="toxic" else 0 for ex in dataset]

#     preds = []
#     for i in range(0, len(texts), batch_size):
#         batch_texts = texts[i: i+batch_size]
#         enc = tok(batch_texts, return_tensors="pt", padding=True,
#                   truncation=True, max_length=128)
#         enc = {k:v.to(device) for k,v in enc.items()}

#         with torch.no_grad():
#             outputs = model.generate(
#                 **enc,
#                 max_new_tokens=max_new_tokens,
#                 do_sample=False,
#                 temperature=0.0,
#                 pad_token_id=tok.pad_token_id,
#                 eos_token_id=tok.eos_token_id,
#             )

#         input_lens = (enc["input_ids"] != tok.pad_token_id).sum(dim=1).tolist()
#         for out_ids, in_len in zip(outputs.cpu().tolist(), input_lens):
#             gen_ids = out_ids[in_len:] if len(out_ids)>in_len else []
#             decoded_gen = tok.decode(gen_ids, skip_special_tokens=True).strip().lower()

#             if "toxic" in decoded_gen:
#                 preds.append(1)
#             elif "normal" in decoded_gen:
#                 preds.append(0)
#             else:
#                 # fallback: last token
#                 toks = decoded_gen.split()
#                 if toks and toks[-1].startswith("toxic"): preds.append(1)
#                 elif toks and toks[-1].startswith("normal"): preds.append(0)
#                 else: preds.append(0)  # default to normal

#     return preds, labels

# # Run evaluation
# preds, labels = batched_eval(trainer, eval_dataset, batch_size=16)

# acc = accuracy_score(labels, preds)
# f1 = f1_score(labels, preds)
# print(f"\nValidation Accuracy: {acc:.4f}, F1: {f1:.4f}")
# print(classification_report(labels, preds, target_names=["normal","toxic"], digits=4))

# # Confusion matrix
# cm = confusion_matrix(labels, preds)
# plt.figure(figsize=(5,5))
# sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", square=True,
#             xticklabels=["normal","toxic"], yticklabels=["normal","toxic"],
#             annot_kws={"size":14})
# plt.ylabel("True Label")
# plt.xlabel("Predicted Label")
# plt.title("Confusion Matrix - Validation Set")
# plt.tight_layout()
# plt.savefig(os.path.join(saved_model,"confusion_matrix.png"), dpi=300)
# plt.show()
# plt.close

In [1]:
model.push_to_hub("cike-dev/Gemma3ToxicTextClassifier-bkp")
tokenizer.push_to_hub("cike-dev/Gemma3ToxicTextClassifier-bkp")

NameError: name 'model' is not defined