In [None]:
# Standard installs
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers "trl<0.9.0" peft bitsandbytes
!pip install datasets # If loading from Hugging Face Hub
!pip install --upgrade bitsandbytes
import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
# Import necessary classes from transformers and peft
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, Trainer
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-1dxx8eqr/unsloth_77aa002ae3174fbc867a9079841c49a1
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-1dxx8eqr/unsloth_77aa002ae3174fbc867a9079841c49a1
  Resolved https://github.com/unslothai/unsloth.git to commit 7a8f99e1890213cdd01a3ab6c3e13174a96e8220
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting unsloth_zoo>=2025.4.1 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading unsloth_zoo-2025.4.1-py3-none-any.whl.metadata (8.0 kB)
Collecting tyro (from unsloth@ git+https://github.com/unslothai/unsloth.git

Collecting xformers
  Downloading xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting trl<0.9.0
  Downloading trl-0.8.6-py3-none-any.whl.metadata (11 kB)
Downloading xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl (31.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.5/31.5 MB[0m [31m80.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trl-0.8.6-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.2/245.2 kB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xformers, trl
  Attempting uninstall: trl
    Found existing installation: trl 0.15.2
    Uninstalling trl-0.15.2:
      Successfully uninstalled trl-0.15.2
Successfully installed trl-0.8.6 xformers-0.0.30
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


    PyTorch 2.7.0+cu126 with CUDA 1206 (you have 2.5.1+cu124)
    Python  3.11.12 (you have 3.11.11)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
from datasets import Dataset
# 1. Load Base Model
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# 2. Prepare Model for LoRA
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)

# 3. Create Simple Dummy Dataset
data = {
    "prompt": [
        "I feel sad. What should I do?",
        "How can I manage anxiety?",
        "What are good habits for mental health?",
        "How can I feel more confident?"
    ]
}

dataset = Dataset.from_dict(data)

# Tokenization
def tokenize_function(example):
    model_inputs = tokenizer(
        example["prompt"],
        truncation=True,
        padding="max_length",
        max_length=512,
    )
    model_inputs["labels"] = model_inputs["input_ids"].copy()
    return model_inputs

dataset = dataset.map(tokenize_function)

# 4. Fine-Tuning Settings
training_args = TrainingArguments(
    output_dir="./tinyllama_mentalhealth_ollama",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=2,
    num_train_epochs=1,
    logging_steps=5,
    save_steps=10,
    fp16=True,
    save_total_limit=1,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
)

# 5. Train Model
trainer.train()

# 6. Merge and Save Properly
from peft import PeftModel

merged_model = model.merge_and_unload()
merged_model.save_pretrained("./tinyllama_export_ollama_ready")
tokenizer.save_pretrained("./tinyllama_export_ollama_ready")

print("✅ Model is now cleanly saved and ready for Ollama export.")

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

  trainer = Trainer(
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mjayanth-kalyanam[0m ([33mjayanth-kalyanam-san-jose-state-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss


✅ Model is now cleanly saved and ready for Ollama export.
