In [1]:
# ------------------------ Install Required Packages ------------------------
!pip install -q unsloth peft transformers datasets accelerate rouge_score nltk evaluate polars

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/46.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.9/46.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.5/294.5 kB[0m [31m24.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m40.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m18.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.8/375.8 kB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.2/154.2 kB[0m [31m15.0 MB/s[0m eta 

In [2]:
# ------------------------ Import Libraries ------------------------
import torch
import numpy as np
import polars as pl
import evaluate
import nltk

from datasets import Dataset, concatenate_datasets
from unsloth import FastLanguageModel
from transformers import (
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling
)
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer

nltk.download("punkt")


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [4]:
# ------------------------ Load Model and Tokenizer from Unsloth ------------------------
model_name = "unsloth/SmolLM-135M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token = tokenizer.eos_token

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=512,
    dtype=torch.bfloat16,   # Full precision like original
    load_in_4bit=False     # No quantization, full fine-tuning
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

==((====))==  Unsloth 2025.6.12: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/978 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

In [5]:
# ------------------------ Load Dataset ------------------------
anime_configs = [
    "chainsawman", "kurokonobasuke", "onepunch", "hellsing", "frieren", "aot",
    "naruto", "dr_stone", "gundam_00", "darling-in-the-franxx",
    "berserk", "evangelion", "onepiece"
]

In [6]:
train_splits, val_splits = [], []

for cfg in anime_configs:
    df_train = pl.read_ndjson(f"hf://datasets/theblackcat102/anime-understanding-dataset/{cfg}_dev.jsonl").to_pandas()
    df_val = pl.read_ndjson(f"hf://datasets/theblackcat102/anime-understanding-dataset/{cfg}_val.jsonl").to_pandas()
    train_splits.append(Dataset.from_pandas(df_train))
    val_splits.append(Dataset.from_pandas(df_val))

train_dataset = concatenate_datasets(train_splits)
val_dataset = concatenate_datasets(val_splits)

In [7]:
# ------------------------ Format and Tokenize ------------------------
def format_and_tokenize(example):
    question = example['question']
    prompt = (f"Question: {question}\nOptions:\n"
              f"A. {example['A']}\nB. {example['B']}\n"
              f"C. {example['C']}\nD. {example['D']}\nAnswer:")
    correct = example[example['answer']]
    full_text = prompt + " " + correct

    tokens = tokenizer(
        full_text,
        truncation=True,
        max_length=512,
        padding="max_length",
        return_tensors="pt"
    )
    labels = tokens['input_ids'].clone()
    labels[labels == tokenizer.pad_token_id] = -100
    tokens = {key: value.squeeze() for key, value in tokens.items()}
    tokens['labels'] = labels.squeeze()
    return tokens

train_dataset = train_dataset.map(format_and_tokenize, remove_columns=train_dataset.column_names)
val_dataset = val_dataset.map(format_and_tokenize, remove_columns=val_dataset.column_names)

Map:   0%|          | 0/65 [00:00<?, ? examples/s]

Map:   0%|          | 0/130 [00:00<?, ? examples/s]

In [12]:
# ------------------------ Collator & Metrics ------------------------
collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=-1) if predictions.ndim == 3 else predictions
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)

    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    rouge = evaluate.load("rouge")
    bleu_scores, perplexities = [], []

    scorer = rouge_scorer.RougeScorer(["rouge1", "rougeL"], use_stemmer=True)
    smoothing = SmoothingFunction().method1

    for ref, pred in zip(decoded_labels, decoded_preds):
        bleu = sentence_bleu([ref.split()], pred.split(), smoothing_function=smoothing)
        bleu_scores.append(bleu)

        # Truncate inputs for perplexity calculation
        inputs = tokenizer(pred, return_tensors="pt", truncation=True, max_length=512).input_ids.to(device)
        with torch.no_grad():
            loss = model(inputs, labels=inputs).loss
            ppl = torch.exp(loss).item()
            perplexities.append(ppl)

    result = rouge.compute(predictions=decoded_preds, references=decoded_labels)
    result["bleu"] = np.mean(bleu_scores)
    result["perplexity"] = np.mean(perplexities)
    return result

In [13]:
# ------------------------ Training ------------------------
training_args = TrainingArguments(
    output_dir="./unsloth_anime_qa_full_finetune",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    learning_rate=3e-5,
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=1,
    report_to="none",
    remove_unused_columns=False,
    fp16=False   # Full precision like original
)

In [14]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

  trainer = Trainer(


In [15]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 65 | Num Epochs = 3 | Total steps = 51
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 2 x 1) = 4
 "-____-"     Trainable parameters = 106,203,456 of 134,515,008 (78.95% trained)


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Bleu,Perplexity
1,4.685,3.125448,0.424059,0.175655,0.366603,0.417486,0.111243,81.934057
2,4.3612,3.154062,0.425572,0.175106,0.366268,0.418487,0.110095,95.622818
3,4.3678,3.143355,0.424268,0.174449,0.36488,0.41759,0.110122,71.851273


TrainOutput(global_step=51, training_loss=4.477564708859313, metrics={'train_runtime': 77.9865, 'train_samples_per_second': 2.5, 'train_steps_per_second': 0.654, 'total_flos': 63620118282240.0, 'train_loss': 4.477564708859313, 'epoch': 3.0})

In [16]:
trainer.evaluate()

{'eval_loss': 3.143355131149292,
 'eval_rouge1': 0.42426765240440145,
 'eval_rouge2': 0.1744488743715957,
 'eval_rougeL': 0.3648801201926596,
 'eval_rougeLsum': 0.4175900864841784,
 'eval_bleu': 0.11012168727939944,
 'eval_perplexity': 71.8512730029913,
 'eval_runtime': 18.6907,
 'eval_samples_per_second': 6.955,
 'eval_steps_per_second': 3.478,
 'epoch': 3.0}

In [17]:
# ------------------------ Save Final Model ------------------------
trainer.save_model("./unsloth_smollm_finetuned/final_model")
tokenizer.save_pretrained("./unsloth_smollm_finetuned/final_model")
print("✅ Fine-tuning completed and model saved!")

✅ Fine-tuning completed and model saved!


In [19]:
# ------------------------ Test the Fine-Tuned Model ------------------------
fine_tuned_model, fine_tuned_tokenizer = FastLanguageModel.from_pretrained(
    "./unsloth_smollm_finetuned/final_model",
    max_seq_length=512,
    dtype=torch.bfloat16,
    load_in_4bit=False
)

==((====))==  Unsloth 2025.6.12: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 8.0. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [20]:
fine_tuned_model = fine_tuned_model.to(device)

test_input = "Instruction: Who is the main character in Chainsaw Man?\n\nResponse:"
inputs = fine_tuned_tokenizer(test_input, return_tensors="pt").to(device)

In [21]:
outputs = fine_tuned_model.generate(
    inputs["input_ids"],
    max_length=100,
    num_return_sequences=1,
    do_sample=True,
    temperature=0.7
)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


In [22]:
response = fine_tuned_tokenizer.decode(outputs[0], skip_special_tokens=True)
print("\n✅ Model Response:")
print(response)


✅ Model Response:
Instruction: Who is the main character in Chainsaw Man?

Response: Chainsaw Man is the main character.

Explanation:

##### Chainsaw Man: The main character

• He is the main character.

• He is the main character.

• He is the main character.

##### Chainsaw Man: Is the main character?

Response: Chainsaw Man is the main character.

Explanation:
