In [1]:
pip install datasets transformers torch bitsandbytes peft trl colorama -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.9/511.9 kB[0m [31m23.6 MB/s[0m eta [36m0:00:00[0m
[?25h

# **Training Model with LoRA**

In [None]:
from colorama import Fore, Style
from datasets import load_dataset
from trl import SFTTrainer, SFTConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch


from huggingface_hub import login
login("YOUR_HF_TOKEN")

dataset = load_dataset("json", data_files="instructionquality.json", split="train")
print(Fore.YELLOW  + str(dataset[2]) + Fore.RESET)

def format_chat_template(batch, tokenizer):
    system_prompt = """You are a helpful, honest, and harmless assistant designed to teach Python programming to beginners.
    Explain concepts clearly and step by step. Provide working Python code examples wherever relevant.
    Focus on Python fundamentals, coding best practices, and problem-solving skills.
    If a question is outside your scope, politely advise the user that you cannot answer it.
    Think through each question logically before providing an answer."""

    samples = []
    questions = batch["question"]
    answers = batch["answer"]

    for i in range(len(questions)):
      row_json = [
          {"role": "system", "content": system_prompt},
          {"role": "user", "content": questions[i]},
          {"role": "assistant", "content": answers[i]}
      ]

      # Apply chat template and append the result to the list
      tokenizer.chat_template = "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
      text = tokenizer.apply_chat_template(row_json, tokenize=False, add_generation_prompt=True)
      samples.append(text)

    return {
        "instruction": questions,
        "response": answers,
        "text": samples
    }


base_model = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(
    base_model,
    trust_remote_code = True,
    token = "YOUR_HF_TOKEN",
    )

train_dataset = dataset.map(lambda x: format_chat_template(x, tokenizer), num_proc=2, batched=True, batch_size=10)
print(Fore.LIGHTMAGENTA_EX  + str(train_dataset[0]) + Fore.RESET)


quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    )


model = AutoModelForCausalLM.from_pretrained(
    base_model,
    device_map="auto",
    quantization_config=quant_config,
    token="YOUR_HF_TOKEN",
    )

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(
    r=32, #if can more use 16, 32 or more
    lora_alpha=64, #twice the r value
    target_modules="all-linear",
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
    )

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    args=SFTConfig (
        output_dir="meta-llama/Llama-3.2-1B-SFT",
        num_train_epochs=5, # if possible try 50 or more
        save_steps=100),
    peft_config=peft_config,
)

trainer.train()

trainer.save_model('complete_checkpoint')
trainer.model.save_pretrained("final_model")


Generating train split: 0 examples [00:00, ? examples/s]

[33m{'question': " In what way does Python's syntax differ from other programming languages?", 'answer': 'Python uses new lines to complete a command, unlike some other programming languages which use semicolons or parentheses. Additionally, Python relies on indentation using whitespace to define the scope of loops, functions, and classes.'}[39m


tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

Map (num_proc=2):   0%|          | 0/980 [00:00<?, ? examples/s]

[95m{'question': ' What are the applications of Python mentioned in the data?', 'answer': 'The applications of Python include web development (server-side), software development, mathematics, and system scripting.', 'instruction': ' What are the applications of Python mentioned in the data?', 'response': 'The applications of Python include web development (server-side), software development, mathematics, and system scripting.', 'text': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful, honest, and harmless assistant designed to teach Python programming to beginners.\n    Explain concepts clearly and step by step. Provide working Python code examples wherever relevant.\n    Focus on Python fundamentals, coding best practices, and problem-solving skills.\n    If a question is outside your scope, politely advise the user that you cannot answer it.\n    Think through each question logically before providing an answer.<|eot_id|><|start_header_id|>user<|end_h

config.json:   0%|          | 0.00/877 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

Adding EOS to train dataset:   0%|          | 0/980 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/980 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/980 [00:00<?, ? examples/s]

  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mnixonnyangau01[0m ([33mnixonnyangau01-universiti-teknologi-mara-official-website[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,2.6899
20,1.7143
30,0.9886
40,0.789
50,0.7605
60,0.7581
70,0.7699
80,0.7217
90,0.6897
100,0.704


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


# **Merge the adapter(LoRa) to the model**




In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# Base model
base_model = "meta-llama/Llama-3.2-1B-Instruct"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    base_model,
    trust_remote_code=True,
    token="YOUR_HF_TOKEN",
)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    token="YOUR_HF_TOKEN",
)

# Load adapter (LoRA checkpoint you already trained)
adapter_path = "/content/final_model"
model = PeftModel.from_pretrained(model, adapter_path)

# Merge LoRA into base model
merged_model = model.merge_and_unload()

# Save as one folder (weights merged into base)
save_path = "ikigai_model"
merged_model.save_pretrained(save_path, safe_serialization=True, max_shard_size="2GB")
tokenizer.save_pretrained(save_path)

print(f"✅ Model merged and saved at {save_path}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


✅ Model merged and saved at ikigai_model


# **Copied the Tokenizer into Merge FIle**

In [None]:
import os
import shutil
from huggingface_hub import snapshot_download

# Step 1: Download the full base model locally
local_base = snapshot_download(
    "meta-llama/Llama-3.2-1B-Instruct",
    token="YOUR_HF_TOKEN"
)

# Step 2: Copy the tokenizer.model from "original" subfolder into your merged folder
output_dir = "/content/ikigai_model"
shutil.copy(
    os.path.join(local_base, "original", "tokenizer.model"),
    os.path.join(output_dir, "tokenizer.model")
)

print("✅ tokenizer.model copied to ikigai_model/")


Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

✅ tokenizer.model copied to ikigai_model/


In [1]:
!zip -r ikigai_model.zip /content/ikigai_model
from google.colab import files
files.download("ikigai_model.zip")

  adding: content/ikigai_model/ (stored 0%)
  adding: content/ikigai_model/model.safetensors.index.json (deflated 95%)
  adding: content/ikigai_model/special_tokens_map.json (deflated 61%)
  adding: content/ikigai_model/chat_template.jinja (deflated 71%)
  adding: content/ikigai_model/tokenizer_config.json (deflated 96%)
  adding: content/ikigai_model/model-00002-of-00002.safetensors (deflated 21%)
  adding: content/ikigai_model/tokenizer.json (deflated 85%)
  adding: content/ikigai_model/tokenizer.model (deflated 54%)
  adding: content/ikigai_model/model-00001-of-00002.safetensors (deflated 21%)
  adding: content/ikigai_model/generation_config.json (deflated 33%)
  adding: content/ikigai_model/config.json (deflated 52%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Short Testing

In [None]:
import torch
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM

# Path to merged model
merged_model_dir = "./ikigai_model"

# Load tokenizer + model
tokenizer = AutoTokenizer.from_pretrained(merged_model_dir)
model = AutoModelForCausalLM.from_pretrained(
    merged_model_dir, device_map="auto", torch_dtype=torch.float16
)

# Create pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer)

# Example prompts
prompts = [
    "What are the applications of Python mentioned in the data?",
    "How would you print a string with an apostrophe using double quotes?",
    "How would you display a floating-point number with three decimal places using the format method in Python?",
    "what is python and how do i print in python?.",
]

# Test inference
def test_inference(prompt):
    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
    return outputs[0]["generated_text"]

for prompt in prompts:
    print(f"Prompt:\n{prompt}")
    print(f"Response:\n{test_inference(prompt)}")
    print("-" * 50)
