In [1]:
!pip install transformers datasets accelerate evaluate trl

Collecting evaluate
  Downloading evaluate-0.4.5-py3-none-any.whl.metadata (9.5 kB)
Collecting trl
  Downloading trl-0.21.0-py3-none-any.whl.metadata (11 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting transformers
  Downloading transformers-4.55.2-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub>=0.24.0 (from datasets)
  Downloading huggingface_hub-0.34.4-py3-none-any.whl.metadata (14 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 

In [20]:
#Import necessary libraries
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
import torch
import pandas as pd
import numpy as np
from trl import SFTConfig, SFTTrainer, setup_chat_format
from IPython.core.display import display, HTML

In [21]:
#Check if GPU is available
device = ("cuda"
          if torch.cuda.is_available()
         else "cpu"
         )
torch.cuda.is_available()

True

In [22]:
#Load the model and tokenizer
model_name = "HuggingFaceTB/SmolLM2-135M"
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [23]:
#Setting up the chat format
model, tokenizer = setup_chat_format(model=model, tokenizer=tokenizer)

In [24]:
prompt = "Write a haiku about programming"

messages = [{"role": "user", "content": prompt}]
formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False)

inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=150)
print("Before Training")
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Before Training
user
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a haiku about programming
Write a ha


In [25]:
#Display the dataset
display(
    HTML(
        """<iframe
  src="https://huggingface.co/datasets/HuggingFaceTB/smoltalk/embed/viewer/all/train"
  frameborder="0"
  width="100%"
  height="560px"
></iframe>"""
    )
)

In [27]:
#load dataset
dataset = load_dataset("HuggingFaceTB/smoltalk", "everyday-conversations")
df = pd.DataFrame(dataset["train"])
df

data/everyday-conversations/train-00000-(…):   0%|          | 0.00/946k [00:00<?, ?B/s]

data/everyday-conversations/test-00000-o(…):   0%|          | 0.00/52.6k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2260 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/119 [00:00<?, ? examples/s]

Unnamed: 0,full_topic,messages
0,Travel/Vacation destinations/Beach resorts,"[{'content': 'Hi there', 'role': 'user'}, {'co..."
1,Work/Career development/Mentorship,"[{'content': 'Hi', 'role': 'user'}, {'content'..."
2,Shopping/Window shopping/Window shopping etiqu...,"[{'content': 'Hi', 'role': 'user'}, {'content'..."
3,Cooking/Cooking for others/Food gifting,"[{'content': 'Hi there', 'role': 'user'}, {'co..."
4,Weather/Climate change/Climate change impacts,"[{'content': 'Hi', 'role': 'user'}, {'content'..."
...,...,...
2255,Cooking/Cooking shows/Kids' cooking shows,"[{'content': 'Hi', 'role': 'user'}, {'content'..."
2256,Music/Musical instruments/Learning an instrument,"[{'content': 'Hi', 'role': 'user'}, {'content'..."
2257,basic math concepts/addition and subtraction,"[{'content': 'Hi', 'role': 'user'}, {'content'..."
2258,Pets/Veterinary visits/Specialized treatments,"[{'content': 'Hi there', 'role': 'user'}, {'co..."


In [39]:
def tokenization_function(row):
    text = tokenizer.apply_chat_template(
        row["messages"],
        tokenize=False,
        add_generation_prompt=False
    )
    return tokenizer(
        text,
        truncation=True,
        padding="max_length"  
    )

tokenized_dataset = dataset.map(tokenization_function, batched=True)


Map:   0%|          | 0/2260 [00:00<?, ? examples/s]

Map:   0%|          | 0/119 [00:00<?, ? examples/s]

In [41]:
print(tokenized_dataset["train"][10])

{'full_topic': 'Health/Hygiene/Household cleaning', 'messages': [{'content': 'Hi there', 'role': 'user'}, {'content': 'Hello! How can I help you today?', 'role': 'assistant'}, {'content': "I'm looking for ways to keep my home clean and germ-free. What's the best way to sanitize my kitchen counters?", 'role': 'user'}, {'content': 'To sanitize your kitchen counters, mix 1 tablespoon of unscented bleach with 1 quart of water, and use a soft cloth to wipe down the surfaces.', 'role': 'assistant'}, {'content': 'That sounds easy. What about the bathroom? How often should I clean the shower and bathtub?', 'role': 'user'}, {'content': "It's recommended to clean the shower and bathtub at least once a week, using a gentle cleanser and a scrub brush to remove soap scum and mildew.", 'role': 'assistant'}, {'content': "Okay, I'll add that to my cleaning schedule. Do you have any tips for keeping my trash cans clean and odor-free?", 'role': 'user'}, {'content': 'Yes, you can sprinkle some baking sod

In [None]:
sft_config = SFTConfig(
    output_dir="./results",
    max_steps = 800,
    per_device_train_batch_size=2,
    learning_rate=2e-5,
    save_steps=100
)