In [6]:
import json
import os
from typing import Dict, List, Tuple, Union

import mlx.optimizers as optim
from mlx.utils import tree_flatten
from mlx_lm import load, generate
from mlx_lm.tuner import TrainingArgs, linear_to_lora_layers, train

In [3]:
model_path = "mlx-community/Phi-3.5-mini-instruct-4bit"
model, tokenizer = load(model_path)

Fetching 11 files:   0%|          | 0/11 [00:00<?, ?it/s]

In [4]:
prompt = "generate an SQL query to find all users who registered in the last 30 days"
messages = [{"role": "user", "content": prompt}]
prompt = tokenizer.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)
response = generate(model, tokenizer, prompt=prompt, verbose=True)

To generate an SQL query to find all users who registered in the last 30 days, you would typically use the current date and subtract 29 days (since the count of days should include the day of registration) and compare it with the registration date in the database. Assuming the table is named `users` and has a column for the registration date, let's call it `registration_date`, and it's stored in a date or timestamp format, the SQL query would look something like this:

```sql
SELECT *
FROM users
WHERE registration_date >= CURRENT_DATE - INTERVAL '30 days';
```

Here's a breakdown of the query:

- `SELECT *`: This selects all columns for all rows that match the criteria.
- `FROM users`: This indicates that you're querying the `users` table.
- `WHERE registration_date >= CURRENT_DATE - INTERVAL '30 days`: This filters the rows to only include those where the `registration_date` is within the last 30 days. `CURRENT_DATE` gets today'ized date, and subtracting an interval of '30
Prompt: 20 

# Creando Adaptador

In [5]:
adapter_path = "adapters"
os.makedirs(adapter_path, exist_ok=True)
adapter_config_path = os.path.join(adapter_path, "adapter_config.json")
adapter_file_path = os.path.join(adapter_path, "adapters.safetensors")

# Lora config

In [6]:
lora_config = {
    "num_layers": 8,
    "lora_parameters": {
        "rank": 8,
        "scale": 20.0,
        "dropout": 0.0,
    },
}

In [7]:
with open(adapter_config_path, "w") as f:
    json.dump(lora_config, f, indent=4)

In [8]:
training_args = TrainingArgs(
    adapter_file=adapter_file_path,
    iters=200,
    steps_per_eval=50,
    grad_checkpoint=True,
)

In [9]:
model.freeze()
linear_to_lora_layers(model, lora_config["num_layers"], lora_config["lora_parameters"])
num_train_params = sum(v.size for _, v in tree_flatten(model.trainable_parameters()))
print(f"Number of trainable parameters: {num_train_params}")
model.train()

Number of trainable parameters: 786432


Model(
  (model): Phi3Model(
    (embed_tokens): QuantizedEmbedding(32064, 3072, group_size=64, bits=4)
    (layers.0): TransformerBlock(
      (self_attn): Attention(
        (qkv_proj): QuantizedLinear(input_dims=3072, output_dims=9216, bias=False, group_size=64, bits=4)
        (o_proj): QuantizedLinear(input_dims=3072, output_dims=3072, bias=False, group_size=64, bits=4)
        (rope): SuScaledRoPE()
      )
      (mlp): MLP(
        (gate_up_proj): QuantizedLinear(input_dims=3072, output_dims=16384, bias=False, group_size=64, bits=4)
        (down_proj): QuantizedLinear(input_dims=8192, output_dims=3072, bias=False, group_size=64, bits=4)
      )
      (input_layernorm): RMSNorm(3072, eps=1e-05)
      (post_attention_layernorm): RMSNorm(3072, eps=1e-05)
    )
    (layers.1): TransformerBlock(
      (self_attn): Attention(
        (qkv_proj): QuantizedLinear(input_dims=3072, output_dims=9216, bias=False, group_size=64, bits=4)
        (o_proj): QuantizedLinear(input_dims=3072, out

In [10]:
class Metrics:
    def __init__(self) -> None:
        self.train_losses: List[Tuple[int, float]] = []
        self.val_losses: List[Tuple[int, float]] = []

    def on_train_loss_report(self, info: Dict[str, Union[float, int]]) -> None:
        self.train_losses.append((info["iteration"], info["train_loss"]))

    def on_val_loss_report(self, info: Dict[str, Union[float, int]]) -> None:
        self.val_losses.append((info["iteration"], info["val_loss"]))

In [11]:
metrics = Metrics()

# load data

In [12]:
from mlx_lm.tuner.datasets import load_hf_dataset
config = { }
train_set, val_set, test_set = load_hf_dataset(
    data_id="mlx-community/wikisql",
    tokenizer=tokenizer,
    config=config,
)

In [13]:
print(f"Test set size: {len(test_set)}")
print(f"Validation set size: {len(val_set)}")
print(f"Training set size: {len(train_set)}")
print(f"test set: {test_set[:2]}")

Test set size: 100
Validation set size: 100
Training set size: 1000
test set: {'text': ["table: 1-10015132-16\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: What is terrence ross' nationality\nA: SELECT Nationality FROM 1-10015132-16 WHERE Player = 'Terrence Ross'", "table: 1-10015132-16\ncolumns: Player, No., Nationality, Position, Years in Toronto, School/Club Team\nQ: What clu was in toronto 1995-96\nA: SELECT School/Club Team FROM 1-10015132-16 WHERE Years in Toronto = '1995-96'"]}


In [14]:
from mlx_lm.tuner.datasets import CacheDataset

train_dataset = CacheDataset(train_set)
val_dataset = CacheDataset(val_set)

train(
    model,
    optim.Adam(learning_rate=1e-5),
    train_dataset,
    val_dataset,
    args=training_args,
    training_callback=metrics
)


Starting training..., iters: 200


Calculating loss...: 100%|██████████| 25/25 [00:39<00:00,  1.60s/it]

Iter 1: Val loss 3.253, Val took 40.003s





Iter 10: Train loss 3.317, Learning Rate 1.000e-05, It/sec 0.475, Tokens/sec 180.896, Trained Tokens 3812, Peak mem 3.131 GB
Iter 20: Train loss 2.594, Learning Rate 1.000e-05, It/sec 0.473, Tokens/sec 175.779, Trained Tokens 7531, Peak mem 3.131 GB
Iter 30: Train loss 1.959, Learning Rate 1.000e-05, It/sec 0.407, Tokens/sec 168.812, Trained Tokens 11677, Peak mem 3.131 GB
Iter 40: Train loss 1.981, Learning Rate 1.000e-05, It/sec 0.464, Tokens/sec 170.318, Trained Tokens 15347, Peak mem 3.131 GB


Calculating loss...: 100%|██████████| 25/25 [00:56<00:00,  2.24s/it]

Iter 50: Val loss 1.654, Val took 56.184s





Iter 50: Train loss 1.563, Learning Rate 1.000e-05, It/sec 0.412, Tokens/sec 162.975, Trained Tokens 19301, Peak mem 3.199 GB
Iter 60: Train loss 1.541, Learning Rate 1.000e-05, It/sec 0.413, Tokens/sec 165.119, Trained Tokens 23302, Peak mem 3.199 GB
Iter 70: Train loss 1.684, Learning Rate 1.000e-05, It/sec 0.374, Tokens/sec 146.850, Trained Tokens 27233, Peak mem 3.199 GB
Iter 80: Train loss 1.457, Learning Rate 1.000e-05, It/sec 0.292, Tokens/sec 118.472, Trained Tokens 31297, Peak mem 3.199 GB
Iter 90: Train loss 1.465, Learning Rate 1.000e-05, It/sec 0.322, Tokens/sec 130.507, Trained Tokens 35346, Peak mem 3.199 GB


Calculating loss...: 100%|██████████| 25/25 [00:49<00:00,  1.97s/it]

Iter 100: Val loss 1.419, Val took 49.254s





Iter 100: Train loss 1.346, Learning Rate 1.000e-05, It/sec 0.380, Tokens/sec 150.027, Trained Tokens 39293, Peak mem 3.199 GB
Iter 100: Saved adapter weights to adapters/adapters.safetensors and adapters/0000100_adapters.safetensors.
Iter 110: Train loss 1.546, Learning Rate 1.000e-05, It/sec 0.408, Tokens/sec 159.853, Trained Tokens 43210, Peak mem 3.199 GB
Iter 120: Train loss 1.345, Learning Rate 1.000e-05, It/sec 0.427, Tokens/sec 172.446, Trained Tokens 47253, Peak mem 3.202 GB
Iter 130: Train loss 1.356, Learning Rate 1.000e-05, It/sec 0.442, Tokens/sec 178.428, Trained Tokens 51291, Peak mem 3.202 GB
Iter 140: Train loss 1.373, Learning Rate 1.000e-05, It/sec 0.461, Tokens/sec 166.150, Trained Tokens 54897, Peak mem 3.202 GB


Calculating loss...: 100%|██████████| 25/25 [00:47<00:00,  1.92s/it]

Iter 150: Val loss 1.345, Val took 48.049s





Iter 150: Train loss 1.393, Learning Rate 1.000e-05, It/sec 0.420, Tokens/sec 163.223, Trained Tokens 58785, Peak mem 3.202 GB
Iter 160: Train loss 1.287, Learning Rate 1.000e-05, It/sec 0.380, Tokens/sec 150.868, Trained Tokens 62750, Peak mem 3.202 GB
Iter 170: Train loss 1.254, Learning Rate 1.000e-05, It/sec 0.342, Tokens/sec 132.821, Trained Tokens 66633, Peak mem 3.202 GB
Iter 180: Train loss 1.169, Learning Rate 1.000e-05, It/sec 0.263, Tokens/sec 106.740, Trained Tokens 70684, Peak mem 3.208 GB
Iter 190: Train loss 1.383, Learning Rate 1.000e-05, It/sec 0.271, Tokens/sec 106.087, Trained Tokens 74593, Peak mem 3.208 GB


Calculating loss...: 100%|██████████| 25/25 [01:07<00:00,  2.71s/it]

Iter 200: Val loss 1.314, Val took 67.973s





Iter 200: Train loss 1.287, Learning Rate 1.000e-05, It/sec 0.307, Tokens/sec 112.211, Trained Tokens 78253, Peak mem 3.208 GB
Iter 200: Saved adapter weights to adapters/adapters.safetensors and adapters/0000200_adapters.safetensors.
Saved final weights to adapters/adapters.safetensors.


## Fusionar modelo base con adaptador

In [1]:
! python -m mlx_lm fuse  --model ./phi3-mlx --adapter-path ./adapters --save-path ./new_phi3-mlx

Loading pretrained model


In [2]:
! pip install huggingface_hub
! pip install ipywidgets



# Subir modelo a HF
Utilizando el API de HF se sube el modelo a deimagjas/Phi-3.5-mini-instruct-4bit-sft

In [3]:
from huggingface_hub import login

login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [4]:
from huggingface_hub import create_repo, upload_folder

repo_id = "deimagjas/Phi-3.5-mini-instruct-4bit-sft"

upload_folder(
    folder_path="./new_phi3-mlx",
    repo_id=repo_id
)


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...ning/new_phi3-mlx/model.safetensors:   6%|6         |  130MB / 2.15GB            

CommitInfo(commit_url='https://huggingface.co/deimagjas/Phi-3.5-mini-instruct-4bit-sft/commit/760b8fb8e80a39161f9f1aefb8e147e19adeba46', commit_message='Upload folder using huggingface_hub', commit_description='', oid='760b8fb8e80a39161f9f1aefb8e147e19adeba46', pr_url=None, repo_url=RepoUrl('https://huggingface.co/deimagjas/Phi-3.5-mini-instruct-4bit-sft', endpoint='https://huggingface.co', repo_type='model', repo_id='deimagjas/Phi-3.5-mini-instruct-4bit-sft'), pr_revision=None, pr_num=None)

## Test HF model

In [7]:
model_path = "deimagjas/Phi-3.5-mini-instruct-4bit-sft"
model_sft, tokenizer_sft = load(model_path)

Fetching 11 files:   0%|          | 0/11 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/2.15G [00:00<?, ?B/s]

In [8]:
prompt = "generate an SQL query to find all users who registered in the last 30 days"
messages = [{"role": "user", "content": prompt}]
prompt = tokenizer_sft.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)
response = generate(model_sft, tokenizer_sft, prompt=prompt, verbose=True)

To generate an SQL query that finds all users who have registered in the last 30 days, you'll need to use the `CURRENT_DATE` or a function equivalent in your specific SQL dialect (like `GETDATE()` in SQL Server, `CURRENT_DATE` in MySQL, or `SYSDATE()` in Oracle) to get the current date and then subtract 30 days to find the date 30 days ago. Assuming you have a table named `users` with a `registration_date` column that stores the registration date, and the date is stored in a standard format (like `YYYY-MM-DD`), the query would look something like this:


```sql

SELECT * FROM users
WHERE registration_date >= CURRENT_DATE - INTERVAL '30 days'

```sql

-- For MySQL
SELECT * FROM users
WHERE registration_date >= CURRENT_DATE - INTERVAL 30 DAY;

```

Or

```sql

-- For PostgreSQL
SELECT * FROM users
WHERE registration_date >= CURRENT_DATE - INTERVAL '30 days';

```


Prompt: 20 tokens, 73.271 tokens-per-sec
Generation: 256 tokens, 36.304 tokens-per-sec
Peak memory: 2.467 GB
