
<a href="https://colab.research.google.com/drive/10fG48rdik5HuXvT4LJ8yOD4k0PW3GpwD?usp=sharing" target="_blank">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab"/>
</a>

In [1]:
# Step 1: Install necessary libraries
!pip install -q unsloth "torch>=2.1" transformers accelerate peft datasets trl bitsandbytes


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.4/46.4 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.2/193.2 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m92.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m85.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m58.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# Step 2: Import all modules

from unsloth import FastLanguageModel
from datasets import load_dataset, Dataset
from transformers import TrainingArguments
from trl import SFTTrainer
import torch
import pandas as pd
import random
import os


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
Unsloth: Failed to patch SmolVLMForConditionalGeneration forward function.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
# Step 3: Load TinyLlama-1.1B-Chat model

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    max_seq_length=2048,
    dtype=torch.float16,
    load_in_4bit=True,
)

# Enable LoRA
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    use_gradient_checkpointing=True,
)


==((====))==  Unsloth 2025.4.1: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/762M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.37k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.4.1 patched 22 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


In [4]:
# Step 4: Create small instruction dataset for quick fine-tuning

questions = [
    "How to manage exam anxiety?",
    "Tips for improving self-confidence?",
    "How to cope with feelings of loneliness?",
    "Best methods to handle panic attacks?",
    "Simple mindfulness exercises for stress relief."
]

answers = [
    "Practice deep breathing, organize study plans, and take regular breaks.",
    "Celebrate small wins, practice positive affirmations, and engage in challenging tasks gradually.",
    "Join social activities, reconnect with friends, or volunteer for community work.",
    "Use the 5-4-3-2-1 grounding technique and slow breathing exercises.",
    "Try deep breathing, body scans, and mindful walking for stress relief."
]


data = []
for _ in range(500):
    q = random.choice(questions)
    a = random.choice(answers)
    data.append({"instruction": q, "input": "", "output": a})

df = pd.DataFrame(data)
mental_health_dataset = Dataset.from_pandas(df)

# Format
def format_prompt(example):
    instruction = example["instruction"]
    output = example["output"]
    prompt = f"### Instruction:\n{instruction}\n\n### Response:\n{output}"
    return {"prompt": prompt, "response": output}

formatted_dataset = mental_health_dataset.map(format_prompt).remove_columns(["instruction", "input", "output"])

# Preview
formatted_dataset[0]


Map:   0%|          | 0/500 [00:00<?, ? examples/s]

{'prompt': '### Instruction:\nHow to manage exam anxiety?\n\n### Response:\nUse the 5-4-3-2-1 grounding technique and slow breathing exercises.',
 'response': 'Use the 5-4-3-2-1 grounding technique and slow breathing exercises.'}

In [5]:
# Step 5: Fine-tune using SFTTrainer

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=formatted_dataset,
    dataset_text_field="prompt",
    max_seq_length=2048,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=2,
        warmup_steps=5,
        max_steps=100,
        learning_rate=2e-5,
        fp16=True,
        logging_steps=10,
        output_dir="./tinyllama_mentalhealth_ollama_ready",
        optim="paged_adamw_8bit",
        save_strategy="epoch",
    ),
)

trainer.train()


Unsloth: Tokenizing ["prompt"] (num_proc=2):   0%|          | 0/500 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 500 | Num Epochs = 1 | Total steps = 100
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 2 x 1) = 4
 "-____-"     Trainable parameters = 4,505,600/4,000,000,000 (0.11% trained)
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mpomacoc217[0m ([33mpomacoc217-college[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
10,2.8025
20,2.6932
30,2.6398
40,2.4554
50,2.3757
60,2.2705
70,2.1755
80,2.0901
90,2.0509
100,2.0218


TrainOutput(global_step=100, training_loss=2.3575428771972655, metrics={'train_runtime': 116.7669, 'train_samples_per_second': 3.426, 'train_steps_per_second': 0.856, 'total_flos': 107974748897280.0, 'train_loss': 2.3575428771972655})

In [6]:
# Step 6: Save fine-tuned model

model.save_pretrained("tinyllama_mentalhealth_ollama")
tokenizer.save_pretrained("tinyllama_mentalhealth_ollama")


('tinyllama_mentalhealth_ollama/tokenizer_config.json',
 'tinyllama_mentalhealth_ollama/special_tokens_map.json',
 'tinyllama_mentalhealth_ollama/tokenizer.model',
 'tinyllama_mentalhealth_ollama/added_tokens.json',
 'tinyllama_mentalhealth_ollama/tokenizer.json')

In [7]:
# Step 7: Quick inference test

def generate_response(prompt, max_tokens=100):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=0.7,
            top_p=0.95,
            repetition_penalty=1.1,
            pad_token_id=tokenizer.eos_token_id,
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

# Test
prompt = """### Instruction:
What are simple mindfulness exercises for stress?

### Response:"""
print("\nGenerated Response:")
print(generate_response(prompt))



Generated Response:
### Instruction:
What are simple mindfulness exercises for stress?

### Response:
Simple mindfulness exercises for stress include deep breathing, body scan meditation, and progressive muscle relaxation. These exercises help to calm the mind and reduce stress by focusing on the present moment and releasing tension in the body.


In [14]:
# Step 8: Save the fine-tuned model and tokenizer (Huggingface format)

save_path = "tinyllama_mentalhealth_ollama"

# Save model and tokenizer
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

print(f"Model and tokenizer saved successfully to '{save_path}' folder.")

Model and tokenizer saved successfully to 'tinyllama_mentalhealth_ollama' folder.
