In [1]:
# Install required libraries
!pip install transformers datasets trl torch accelerate bitsandbytes wandb

Collecting trl
  Downloading trl-0.23.0-py3-none-any.whl.metadata (11 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading trl-0.23.0-py3-none-any.whl (564 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m564.7/564.7 kB[0m [31m36.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes, trl
Successfully installed bitsandbytes-0.47.0 trl-0.23.0


In [2]:
# Import necessary modules
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, pipeline
from datasets import load_dataset
from trl import SFTConfig, SFTTrainer, setup_chat_format
import torch
import os

# Detect device (GPU, MPS, or CPU)
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available() else "cpu"
)

***Quick Check For Resources - Runtime***

In [3]:
!nvidia-smi

Wed Sep 10 22:03:49 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   62C    P8             13W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model_name = "HuggingFaceTB/SmolLM2-135M-Instruct"
model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_name)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)

tokenizer.chat_template = None

from trl.models.utils import setup_chat_format
model, tokenizer = setup_chat_format(model=model, tokenizer=tokenizer)

prompt = "Explain AGI?"
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
print(pipe(prompt, max_new_tokens=200))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/861 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/655 [00:00<?, ?B/s]

Device set to use cuda:0


[{'generated_text': 'Explain AGI?'}]


In [5]:
# Load the dataset
ds = load_dataset("prithivMLmods/Deepthink-Reasoning")

# Tokenization function with batch handling
def tokenize_function(examples):
    # Apply strip to each string in the batch
    prompts = [p.strip() for p in examples["prompt"]]
    responses = [r.strip() for r in examples["response"]]

    # Apply chat template for each prompt-response pair
    texts = [
        tokenizer.apply_chat_template(
            [{"role": "user", "content": p}, {"role": "assistant", "content": r}],
            tokenize=False
        )
        for p, r in zip(prompts, responses)
    ]

    # Tokenize the combined texts
    return tokenizer(texts, truncation=True, padding="max_length", max_length=512)

# Apply the tokenization function to the dataset
ds = ds.map(tokenize_function, batched=True)

README.md: 0.00B [00:00, ?B/s]

dataset/0000.parquet:   0%|          | 0.00/401k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/251 [00:00<?, ? examples/s]

Map:   0%|          | 0/251 [00:00<?, ? examples/s]

In [8]:
use_bf16 = torch.cuda.is_bf16_supported()

# Prepare training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=5,
    max_steps=70,  # Adjust max_steps as needed
    learning_rate=5e-5,
    fp16=not use_bf16,  # Use fp16 if bf16 is not supported
    bf16=use_bf16,  # Use bf16 if supported
    logging_steps=1,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=3407,
    output_dir="outputs",
    report_to="none",  # Use 'wandb' or 'tensorboard' if needed or pass 'none'
)

# Initialize the SFTTrainer
trainer = SFTTrainer(
    model=model,
    processing_class=tokenizer,  # Use tokenizer as processing_class
    train_dataset=ds["train"],  # Train dataset
    args=training_args,  # Pass the training arguments
)

In [9]:
# Start training
trainer.train()

Step,Training Loss
1,0.7456
2,1.0091
3,0.7626
4,0.8466
5,0.9813
6,0.7913
7,0.909
8,0.7654
9,0.945
10,0.642


TrainOutput(global_step=70, training_loss=0.7458553267376763, metrics={'train_runtime': 160.7079, 'train_samples_per_second': 3.485, 'train_steps_per_second': 0.436, 'total_flos': 179441359257600.0, 'train_loss': 0.7458553267376763, 'epoch': 2.1904761904761907})

In [10]:
# Save model and tokenizer to a local directory
save_directory = "/content/my_model"
model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)

# Zip the directory
import shutil
shutil.make_archive(save_directory, 'zip', save_directory)

# Download the zip file
from google.colab import files
files.download(f"{save_directory}.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>