In [4]:
## LOCAL
%run __include.ipynb

In [5]:
from langfuse import Langfuse

langfuse = Langfuse()

In [202]:
from jinja2 import Template

system_content_str = """
    Your input fields are:
    1. `sms_text` (str)

    Your output fields are:
    1. `category` (Literal['ham', 'spam', 'smishing'])

    All interactions will be structured in the following way, with the appropriate values filled in.

    Inputs will have the following structure:

    [[ ## sms_text ## ]]
    {sms_text}

    Outputs will be a JSON object with the following fields.

    {
      "category": "{category}        # note: the value you produce must be one of: ham; spam; smishing"
    }

    In adhering to this structure, your objective is:
            Given an SMS text, predict whether it is ham, spam, or smishing.
            Output only the predicted label.
        """

# system_content_str = "Given an SMS text, predict whether it is ham, spam, or smishing.Output only the predicted label."

user_content_str = """
    [ ## sms_text ## ]]
    {{ sms_text }}

    Respond with a JSON object in the following order of fields: `category` (must be formatted as a valid Python Literal['ham', 'spam', 'smishing']).
"""

predict_str = """
```json
{
  "category": {{ predicted_label | tojson }}
}
```
"""


template_chat_str = """
{
  "messages": [
    {
      "role": "system",
      "content": {{ system_prompt | tojson }}
    },
    {
      "role": "user",
      "content": {{ user_prompt | tojson}}
    },
    {
      "role": "assistant",
      "content": {{ predict | tojson }}
    }
  ]
}
"""

template_complete_str = """
{
  "prompt": "Given an SMS text, predict whether it is ham, spam, or smishing.Output only the predicted label: {{ sms_text }}",
  "completion": "{{ predicted_label }}"
}
"""

In [203]:
import json

system_template = Template(system_content_str)
user_template = Template(user_content_str)
predict_template = Template(predict_str)
template_chat = Template(template_chat_str)
dataset = langfuse.get_dataset("sms_phishing_train")
print(template_chat_str)
data = []
for item in dataset.items:
    data.append(
        template_chat.render(
            system_prompt=system_template.render(
                sms_text=item.input, category=item.expected_output
            ),
            user_prompt=user_template.render(sms_text=item.input),
            predict=predict_template.render(predicted_label=item.expected_output),
        )
    )

with open("../../mlx/data/train.jsonl", "w") as f:
    for record in data:
        try:
            parsed = json.loads(record)
            json_record = json.dumps(parsed)
            f.write(json_record + "\n")
        except json.JSONDecodeError as e:
            pass


{
  "messages": [
    {
      "role": "system",
      "content": {{ system_prompt | tojson }}
    },
    {
      "role": "user",
      "content": {{ user_prompt | tojson}}
    },
    {
      "role": "assistant",
      "content": {{ predict | tojson }}
    }
  ]
}



In [204]:
dataset = langfuse.get_dataset("sms_phishing_val")
data = []
for item in dataset.items:
    data.append(
        template_chat.render(
            system_prompt=system_template.render(
                sms_text=item.input, category=item.expected_output
            ),
            user_prompt=user_template.render(sms_text=item.input),
            predict=predict_template.render(predicted_label=item.expected_output),
        )
    )

with open("../../mlx/data/valid.jsonl", "w") as f:
    for record in data:
        try:
            parsed = json.loads(record)
            json_record = json.dumps(parsed)
            f.write(json_record + "\n")
        except json.JSONDecodeError as e:
            pass

In [2]:
! mlx_lm.lora --help

usage: mlx_lm.lora [-h] [--model MODEL] [--train] [--data DATA]
                   [--fine-tune-type {lora,dora,full}]
                   [--optimizer {adam,adamw}] [--mask-prompt]
                   [--num-layers NUM_LAYERS] [--batch-size BATCH_SIZE]
                   [--iters ITERS] [--val-batches VAL_BATCHES]
                   [--learning-rate LEARNING_RATE]
                   [--steps-per-report STEPS_PER_REPORT]
                   [--steps-per-eval STEPS_PER_EVAL]
                   [--resume-adapter-file RESUME_ADAPTER_FILE]
                   [--adapter-path ADAPTER_PATH] [--save-every SAVE_EVERY]
                   [--test] [--test-batches TEST_BATCHES]
                   [--max-seq-length MAX_SEQ_LENGTH] [-c CONFIG]
                   [--grad-checkpoint] [--seed SEED]

LoRA or QLoRA finetuning.

options:
  -h, --help            show this help message and exit
  --model MODEL         The path to the local model directory or Hugging Face
                    

In [24]:
! mlx_lm.fuse --help

Loading pretrained model
usage: mlx_lm.fuse [-h] [--model MODEL] [--save-path SAVE_PATH]
                   [--adapter-path ADAPTER_PATH] [--hf-path HF_PATH]
                   [--upload-repo UPLOAD_REPO] [--de-quantize] [--export-gguf]
                   [--gguf-path GGUF_PATH]

Fuse fine-tuned adapters into the base model.

options:
  -h, --help            show this help message and exit
  --model MODEL         The path to the local model directory or Hugging Face
                        repo.
  --save-path SAVE_PATH
                        The path to save the fused model.
  --adapter-path ADAPTER_PATH
                        Path to the trained adapter weights and config.
  --hf-path HF_PATH     Path to the original Hugging Face model. Required for
                        upload if --model is a local directory.
  --upload-repo UPLOAD_REPO
                        The Hugging Face repo to upload the model to.
  --de-quantize         Generate a de-quantized model.

In [217]:
!  mlx_lm.fuse --model mlx-community/gemma-3-1b-it-bf16 --adapter-path ../../mlx/adapters-merge --save-path ../../mlx/models/gemma-3-1b-it-bf16-ft

Loading pretrained model
Fetching 9 files: 100%|████████████████████████| 9/9 [00:00<00:00, 34223.70it/s]


In [196]:
!  mlx_lm.fuse --model mlx-community/gemma-3-4b-it-bf16 --adapter-path ../../mlx/adapters --save-path ../../mlx/models/gemma-3-4b-it-16bit-ft-8-8-5k

Loading pretrained model
Fetching 11 files: 100%|█████████████████████| 11/11 [00:00<00:00, 54729.95it/s]


In [52]:
! convert_hf_to_gguf.py models/gemma-3-1b-it-16bit-ft-8-8-5k  --outfile models/gguf/gemma-3-1b-it-bf16-ft-8-8-5k.gguf

Traceback (most recent call last):
  File "/opt/homebrew/bin/convert_hf_to_gguf.py", line 21, in <module>
    import numpy as np
ModuleNotFoundError: No module named 'numpy'


In [None]:
!  ollama create gemmma3:1b-it-bf16_ft -f Modelfile