In [5]:
# Get necessary libs
import os
import mlx_lm
import torch
import langchain

## HuggingFace Model Loading and Generate

In [None]:
#Login HuggingFace with API Key
import huggingface_hub
from huggingface_hub import hf_hub_download

MODEL_ID="mlx-community/Llama-3.2-3B-Instruct-4bit"
HUGGING_FACE_API_KEY = os.environ.get("HUGGING_FACE_API_KEY")

huggingface_hub.login(HUGGING_FACE_API_KEY)
# Download Pre-trained LLM
huggingface_hub.snapshot_download(repo_id=MODEL_ID)

In [None]:
# Check Connectivity for can we use model offline:
# Step 1: Turn Off Wifi
# Step 2: Run Cell and Test LLM Outputs
# Step 3: Turn On Wifi and go on

In [None]:
from langchain.llms import HuggingFacePipeline
from transformers import pipeline,AutoTokenizer,AutoModelForCausalLM,AutoModelForSeq2SeqLM,BitsAndBytesConfig

#Load model with 4-bit mode. This cause low memory usage.
quantization_config = BitsAndBytesConfig(load_in_4bit=True,bnb_4bit_quant_type="nf4",bnb_4bit_use_double_quant=True) 

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID,device_map="auto",quantization_config=quantization_config)

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer,max_length=128)
local_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
#Get output with Langchain Prompt Template
prompt = langchain.PromptTemplate(
    input_variables=["name"],
    template="Can you answer this question: '''{name}'''",
)
chain = langchain.LLMChain(prompt=prompt, llm=local_llm)
chain.run("What are competitors to Apache Kafka?")

## Dataset Prepare

In [None]:
# Load dataset from HuggingFace
dataset_name = "gretelai/synthetic_text_to_sql"
save_dir = "./data/synthetic_text_to_sql/"

huggingface_hub.snapshot_download(repo_id=dataset_name, repo_type="dataset", local_dir=save_dir)

Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

llm_as_a_judge_rubric.txt:   0%|          | 0.00/3.45k [00:00<?, ?B/s]

bmc2_llm_judge_example_1.txt:   0%|          | 0.00/1.75k [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/2.31k [00:00<?, ?B/s]

dalle_prompt.txt:   0%|          | 0.00/782 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/8.18k [00:00<?, ?B/s]

bmc2_llm_judge_example_2.txt:   0%|          | 0.00/2.43k [00:00<?, ?B/s]

(…)nthetic_text_to_sql_train.snappy.parquet:   0%|          | 0.00/32.4M [00:00<?, ?B/s]

(…)ynthetic_text_to_sql_test.snappy.parquet:   0%|          | 0.00/1.90M [00:00<?, ?B/s]

'/Users/arda/Documents/llama-finetune/data'

In [None]:
from data.prepare import prepare_train,prepare_test_valid

prepare_train()
prepare_test_valid()

                                              prompt  \
0  What is the total volume of timber sold by eac...   
1  List all the unique equipment types and their ...   
2  How many marine species are found in the South...   
3  What is the total trade value and average pric...   
4  Find the energy efficiency upgrades with the h...   
5  What is the total spending on humanitarian ass...   
6  What is the average water temperature for each...   
7  Delete a program's outcome data with given SQL...   
8  Find the total fare collected from passengers ...   
9  What is the average property size in inclusive...   

                                          completion  
0  SQL: SELECT salesperson_id, name, SUM(volume) ...  
1  SQL: SELECT equipment_type, SUM(maintenance_fr...  
2  SQL: SELECT COUNT(*) FROM marine_species WHERE...  
3  SQL: SELECT trader_id, stock, SUM(price * quan...  
4  SQL: SELECT type, cost FROM (SELECT type, cost...  
5  SQL: SELECT SUM(spending) FROM defense.eu_huma... 

## MLX Lib Model Loading-Training-Testing

### Model Load

In [None]:
#Model load from mlx lib
MODEL_ID="mlx-community/Llama-3.2-3B-Instruct-4bit" # This is quantized model that not require quantization process.
model, tokenizer = mlx_lm.load(MODEL_ID)

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

### Generate

In [4]:
# Default Template
user_content="What are competitors to Apache Kafka?"

if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None:
    messages = [{"role": "user", "content": user_content}]
    prompt = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    print(prompt)

response = mlx_lm.generate(model, tokenizer, prompt=prompt, verbose=True)


<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 17 Feb 2025

<|eot_id|><|start_header_id|>user<|end_header_id|>

What are competitors to Apache Kafka?<|eot_id|><|start_header_id|>assistant<|end_header_id|>


There are several competitors to Apache Kafka in the market. Here are some of the notable ones:

1. **Amazon Kinesis**: Amazon Kinesis is a fast, reliable, and scalable platform for real-time data processing and analysis. It's designed to handle large volumes of data from various sources and provides features like data streaming, processing, and analytics.

2. **Google Cloud Pub/Sub**: Google Cloud Pub/Sub is a messaging system that allows publishers to send messages to subscribers. It's designed to handle large volumes of data and provides features like message queuing, routing, and transformation.

3. **Microsoft Azure Event Grid**: Azure Event Grid is a cloud-based event bus that allows publishers to send events to s

In [None]:
# Custom Template
def custom_chat_template(messages):
    """Creates LLaMA 3.2 custom chat template."""
    chat_str = "<|begin_of_text|>"
    for msg in messages:
        if msg["role"] == "system":
            chat_str += f"<|start_header_id|>system<|end_header_id|>\n\n{msg['content']}\n\n<|eot_id|>\n"
        elif msg["role"] == "user":
            chat_str += f"<|start_header_id|>user<|end_header_id|>\n\n{msg['content']}<|eot_id|>\n"
        elif msg["role"] == "assistant":
            chat_str += f"<|start_header_id|>assistant<|end_header_id|>\n\n{msg['content']}<|eot_id|>\n"
    chat_str += "<|start_header_id|>assistant<|end_header_id|>"
    return chat_str

messages = [
    {"role": "system", "content": "Cutting Knowledge Date: December 2023\nToday Date: 13 Feb 2025\nYou are the support assistant for questions that are asked to you."},
    {"role": "user", "content": "What are competitors to Apache Kafka?"}
]

prompt = custom_chat_template(messages)
print(prompt)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 13 Feb 2025
You are the support assistant for questions that are asked to you.

<|eot_id|>
<|start_header_id|>user<|end_header_id|>

What are competitors to Apache Kafka?<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>


In [6]:
response = mlx_lm.generate(model, tokenizer, prompt=prompt, verbose=True)



Apache Kafka has several competitors in the market. Some of the notable ones are:

1. Amazon Kinesis: Amazon Kinesis is a fast, reliable, and scalable stream processing service offered by Amazon Web Services (AWS). It's designed to handle large amounts of data in real-time and provides features like data processing, event-driven architecture, and real-time analytics.

2. RabbitMQ: RabbitMQ is a message broker that supports multiple messaging patterns, including pub/sub, request/reply, and message queues. It's known for its high performance, reliability, and scalability.

3. Apache Storm: Apache Storm is a distributed real-time computation system that can handle large amounts of data in real-time. It's designed to handle high-throughput and provides features like fault-tolerant, scalable, and real-time processing.

4. Google Cloud Pub/Sub: Google Cloud Pub/Sub is a messaging service offered by Google Cloud Platform. It's designed to handle large amounts of data in real-time and provid

### Model Quantization 

In [None]:
import mlx_lm.convert as convert
help(convert)

Help on module mlx_lm.convert in mlx_lm:

NAME
    mlx_lm.convert - # Copyright © 2023-2024 Apple Inc.

FUNCTIONS
    configure_parser() -> argparse.ArgumentParser
        Configures and returns the argument parser for the script.

        Returns:
            argparse.ArgumentParser: Configured argument parser.

    main()

FILE
    /Users/arda/Documents/llama-finetune/myenv/lib/python3.12/site-packages/mlx_lm/convert.py




In [37]:
# Quantizing huggingface model with mlx.convert api
# Our model is already quantized beacuse of this we are going to quantize new huggingface model.
import argparse
import sys

args = argparse.Namespace(
    hf_path="meta-llama/Llama-3.1-8B-Instruct",
    q_bits=8,
)

# Set args with sys.argv 
sys.argv = [
    "convert.py",
    "--hf-path", str(args.hf_path),
    "--q-bits", str(args.q_bits),
    "--quantize",
]

convert.main()

[INFO] Loading


Fetching 11 files:   0%|          | 0/11 [00:00<?, ?it/s]

[INFO] Quantizing
[INFO] Quantized model with 8.500 bits per weight.


### Fine Tune - QLORA 

In [13]:
help(mlx_lm)
import argparse
from mlx_lm import lora

Help on package mlx_lm:

NAME
    mlx_lm - # Copyright © 2023-2024 Apple Inc.

PACKAGE CONTENTS
    _version
    cache_prompt
    chat
    convert
    evaluate
    fuse
    generate
    gguf
    lora
    manage
    merge
    models (package)
    sample_utils
    server
    tokenizer_utils
    tuner (package)
    utils

VERSION
    0.21.2

FILE
    /Users/arda/Documents/llama-finetune/myenv/lib/python3.12/site-packages/mlx_lm/__init__.py




In [None]:
# QLORA
# Fine-Tune LLM Model- Since the model was quantized, it was applied to the lora without quantizing the model.
import argparse
import sys

args = argparse.Namespace(
    model=MODEL_ID,
    data="./data/",
    train=True,
    test=False,
    batch_size=2,
    num_layers=16,
    iters=1000,
    fine_tune_type="lora",
    resume_adapter_file=None,
    adapter_path = "adapters_llama_3b",
    seed = 42
)

# Set args with sys.argv 
sys.argv = [
    "lora.py",
    "--model", str(args.model),
    "--data", str(args.data),
    "--train",
    "--batch-size", str(args.batch_size),
    "--num-layers", str(args.num_layers),
    "--iters", str(args.iters),
    "--fine-tune-type", str(args.fine_tune_type),
    "--adapter-path", str(args.adapter_path),
    "--seed", str(args.seed)
]

lora.main()

Loading pretrained model


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

Loading datasets
Training
Trainable parameters: 0.071% (2.294M/3212.750M)
Starting training..., iters: 1000
Iter 1: Val loss 2.246, Val took 21.200s
Iter 10: Train loss 2.166, Learning Rate 1.000e-05, It/sec 0.478, Tokens/sec 210.051, Trained Tokens 4394, Peak mem 10.933 GB
Iter 20: Train loss 1.593, Learning Rate 1.000e-05, It/sec 0.552, Tokens/sec 220.859, Trained Tokens 8392, Peak mem 10.933 GB
Iter 30: Train loss 1.168, Learning Rate 1.000e-05, It/sec 0.559, Tokens/sec 224.946, Trained Tokens 12414, Peak mem 10.933 GB
Iter 40: Train loss 0.922, Learning Rate 1.000e-05, It/sec 0.466, Tokens/sec 226.470, Trained Tokens 17272, Peak mem 11.278 GB
Iter 50: Train loss 0.760, Learning Rate 1.000e-05, It/sec 0.538, Tokens/sec 235.703, Trained Tokens 21652, Peak mem 11.278 GB
Iter 60: Train loss 0.688, Learning Rate 1.000e-05, It/sec 0.471, Tokens/sec 233.109, Trained Tokens 26597, Peak mem 11.278 GB
Iter 70: Train loss 0.764, Learning Rate 1.000e-05, It/sec 0.584, Tokens/sec 250.598, Train

In [None]:
"""
MODEL_ID="mlx-community/Llama-3.2-3B-Instruct-4bit"

Loading datasets
Training
Trainable parameters: 0.071% (2.294M/3212.750M)
Starting training..., iters: 1000
Iter 1: Val loss 2.246, Val took 21.200s
Iter 10: Train loss 2.166, Learning Rate 1.000e-05, It/sec 0.478, Tokens/sec 210.051, Trained Tokens 4394, Peak mem 10.933 GB
Iter 20: Train loss 1.593, Learning Rate 1.000e-05, It/sec 0.552, Tokens/sec 220.859, Trained Tokens 8392, Peak mem 10.933 GB
Iter 30: Train loss 1.168, Learning Rate 1.000e-05, It/sec 0.559, Tokens/sec 224.946, Trained Tokens 12414, Peak mem 10.933 GB
Iter 40: Train loss 0.922, Learning Rate 1.000e-05, It/sec 0.466, Tokens/sec 226.470, Trained Tokens 17272, Peak mem 11.278 GB
Iter 50: Train loss 0.760, Learning Rate 1.000e-05, It/sec 0.538, Tokens/sec 235.703, Trained Tokens 21652, Peak mem 11.278 GB
Iter 60: Train loss 0.688, Learning Rate 1.000e-05, It/sec 0.471, Tokens/sec 233.109, Trained Tokens 26597, Peak mem 11.278 GB
Iter 70: Train loss 0.764, Learning Rate 1.000e-05, It/sec 0.584, Tokens/sec 250.598, Trained Tokens 30889, Peak mem 11.278 GB
Iter 80: Train loss 0.662, Learning Rate 1.000e-05, It/sec 0.498, Tokens/sec 228.089, Trained Tokens 35471, Peak mem 11.278 GB
Iter 90: Train loss 0.747, Learning Rate 1.000e-05, It/sec 0.539, Tokens/sec 249.435, Trained Tokens 40103, Peak mem 11.278 GB
Iter 100: Train loss 0.705, Learning Rate 1.000e-05, It/sec 0.618, Tokens/sec 252.376, Trained Tokens 44185, Peak mem 11.278 GB
Iter 100: Saved adapter weights to adapters_llama_3b/adapters.safetensors and adapters_llama_3b/0000100_adapters.safetensors.
Iter 110: Train loss 0.686, Learning Rate 1.000e-05, It/sec 0.574, Tokens/sec 245.039, Trained Tokens 48455, Peak mem 11.278 GB
Iter 120: Train loss 0.614, Learning Rate 1.000e-05, It/sec 0.497, Tokens/sec 251.274, Trained Tokens 53513, Peak mem 11.278 GB
Iter 130: Train loss 0.714, Learning Rate 1.000e-05, It/sec 0.603, Tokens/sec 254.106, Trained Tokens 57729, Peak mem 11.278 GB
Iter 140: Train loss 0.664, Learning Rate 1.000e-05, It/sec 0.541, Tokens/sec 246.672, Trained Tokens 62291, Peak mem 11.278 GB
Iter 150: Train loss 0.621, Learning Rate 1.000e-05, It/sec 0.651, Tokens/sec 243.019, Trained Tokens 66025, Peak mem 11.278 GB
Iter 160: Train loss 0.677, Learning Rate 1.000e-05, It/sec 0.526, Tokens/sec 241.491, Trained Tokens 70615, Peak mem 11.278 GB
Iter 170: Train loss 0.679, Learning Rate 1.000e-05, It/sec 0.600, Tokens/sec 245.477, Trained Tokens 74705, Peak mem 11.278 GB
Iter 180: Train loss 0.642, Learning Rate 1.000e-05, It/sec 0.620, Tokens/sec 247.760, Trained Tokens 78701, Peak mem 11.278 GB
Iter 190: Train loss 0.583, Learning Rate 1.000e-05, It/sec 0.545, Tokens/sec 245.494, Trained Tokens 83206, Peak mem 12.106 GB
Iter 200: Val loss 0.656, Val took 23.333s
Iter 200: Train loss 0.582, Learning Rate 1.000e-05, It/sec 4.609, Tokens/sec 2289.965, Trained Tokens 88174, Peak mem 12.106 GB
Iter 200: Saved adapter weights to adapters_llama_3b/adapters.safetensors and adapters_llama_3b/0000200_adapters.safetensors.
Iter 210: Train loss 0.588, Learning Rate 1.000e-05, It/sec 0.456, Tokens/sec 227.196, Trained Tokens 93160, Peak mem 12.106 GB
Iter 220: Train loss 0.616, Learning Rate 1.000e-05, It/sec 0.445, Tokens/sec 224.953, Trained Tokens 98210, Peak mem 12.106 GB
Iter 230: Train loss 0.595, Learning Rate 1.000e-05, It/sec 0.492, Tokens/sec 226.822, Trained Tokens 102820, Peak mem 12.106 GB
Iter 240: Train loss 0.642, Learning Rate 1.000e-05, It/sec 0.510, Tokens/sec 241.390, Trained Tokens 107550, Peak mem 12.106 GB
Iter 250: Train loss 0.632, Learning Rate 1.000e-05, It/sec 0.577, Tokens/sec 250.235, Trained Tokens 111888, Peak mem 12.106 GB
Iter 260: Train loss 0.629, Learning Rate 1.000e-05, It/sec 0.597, Tokens/sec 244.398, Trained Tokens 115982, Peak mem 12.106 GB
Iter 270: Train loss 0.617, Learning Rate 1.000e-05, It/sec 0.485, Tokens/sec 242.927, Trained Tokens 120990, Peak mem 12.106 GB
Iter 280: Train loss 0.643, Learning Rate 1.000e-05, It/sec 0.604, Tokens/sec 250.170, Trained Tokens 125132, Peak mem 12.106 GB
Iter 290: Train loss 0.603, Learning Rate 1.000e-05, It/sec 0.476, Tokens/sec 247.535, Trained Tokens 130330, Peak mem 12.106 GB
Iter 300: Train loss 0.613, Learning Rate 1.000e-05, It/sec 0.654, Tokens/sec 251.402, Trained Tokens 134172, Peak mem 12.106 GB
Iter 300: Saved adapter weights to adapters_llama_3b/adapters.safetensors and adapters_llama_3b/0000300_adapters.safetensors.
Iter 310: Train loss 0.679, Learning Rate 1.000e-05, It/sec 0.637, Tokens/sec 246.751, Trained Tokens 138046, Peak mem 12.106 GB
Iter 320: Train loss 0.614, Learning Rate 1.000e-05, It/sec 0.657, Tokens/sec 248.500, Trained Tokens 141830, Peak mem 12.106 GB
Iter 330: Train loss 0.645, Learning Rate 1.000e-05, It/sec 0.662, Tokens/sec 250.279, Trained Tokens 145608, Peak mem 12.106 GB
Iter 340: Train loss 0.592, Learning Rate 1.000e-05, It/sec 0.548, Tokens/sec 246.601, Trained Tokens 150110, Peak mem 12.106 GB
Iter 350: Train loss 0.639, Learning Rate 1.000e-05, It/sec 0.615, Tokens/sec 245.428, Trained Tokens 154098, Peak mem 12.106 GB
Iter 360: Train loss 0.658, Learning Rate 1.000e-05, It/sec 0.622, Tokens/sec 248.240, Trained Tokens 158090, Peak mem 12.106 GB
Iter 370: Train loss 0.611, Learning Rate 1.000e-05, It/sec 0.545, Tokens/sec 241.383, Trained Tokens 162522, Peak mem 12.106 GB
Iter 380: Train loss 0.619, Learning Rate 1.000e-05, It/sec 0.546, Tokens/sec 249.920, Trained Tokens 167098, Peak mem 12.106 GB
Iter 390: Train loss 0.651, Learning Rate 1.000e-05, It/sec 0.548, Tokens/sec 241.141, Trained Tokens 171502, Peak mem 12.106 GB
Iter 400: Val loss 0.618, Val took 22.647s
Iter 400: Train loss 0.581, Learning Rate 1.000e-05, It/sec 5.096, Tokens/sec 2473.407, Trained Tokens 176356, Peak mem 12.106 GB
Iter 400: Saved adapter weights to adapters_llama_3b/adapters.safetensors and adapters_llama_3b/0000400_adapters.safetensors.
Iter 410: Train loss 0.589, Learning Rate 1.000e-05, It/sec 0.617, Tokens/sec 247.711, Trained Tokens 180372, Peak mem 12.106 GB
Iter 420: Train loss 0.559, Learning Rate 1.000e-05, It/sec 0.527, Tokens/sec 245.219, Trained Tokens 185022, Peak mem 12.106 GB
Iter 430: Train loss 0.630, Learning Rate 1.000e-05, It/sec 0.567, Tokens/sec 244.814, Trained Tokens 189338, Peak mem 12.106 GB
Iter 440: Train loss 0.596, Learning Rate 1.000e-05, It/sec 0.604, Tokens/sec 252.601, Trained Tokens 193520, Peak mem 12.106 GB
Iter 450: Train loss 0.613, Learning Rate 1.000e-05, It/sec 0.597, Tokens/sec 237.136, Trained Tokens 197492, Peak mem 12.106 GB
Iter 460: Train loss 0.564, Learning Rate 1.000e-05, It/sec 0.511, Tokens/sec 238.764, Trained Tokens 202167, Peak mem 12.106 GB
Iter 470: Train loss 0.648, Learning Rate 1.000e-05, It/sec 0.477, Tokens/sec 194.447, Trained Tokens 206242, Peak mem 12.106 GB
Iter 480: Train loss 0.601, Learning Rate 1.000e-05, It/sec 0.584, Tokens/sec 254.598, Trained Tokens 210604, Peak mem 12.106 GB
Iter 490: Train loss 0.626, Learning Rate 1.000e-05, It/sec 0.605, Tokens/sec 243.318, Trained Tokens 214624, Peak mem 12.106 GB
Iter 500: Train loss 0.632, Learning Rate 1.000e-05, It/sec 0.625, Tokens/sec 248.248, Trained Tokens 218596, Peak mem 12.106 GB
Iter 500: Saved adapter weights to adapters_llama_3b/adapters.safetensors and adapters_llama_3b/0000500_adapters.safetensors.
Iter 510: Train loss 0.600, Learning Rate 1.000e-05, It/sec 0.564, Tokens/sec 239.191, Trained Tokens 222840, Peak mem 12.106 GB
Iter 520: Train loss 0.618, Learning Rate 1.000e-05, It/sec 0.547, Tokens/sec 232.257, Trained Tokens 227084, Peak mem 12.106 GB
Iter 530: Train loss 0.610, Learning Rate 1.000e-05, It/sec 0.478, Tokens/sec 207.306, Trained Tokens 231422, Peak mem 12.106 GB
Iter 540: Train loss 0.622, Learning Rate 1.000e-05, It/sec 0.564, Tokens/sec 238.023, Trained Tokens 235640, Peak mem 12.106 GB
Iter 550: Train loss 0.626, Learning Rate 1.000e-05, It/sec 0.513, Tokens/sec 230.716, Trained Tokens 240140, Peak mem 12.106 GB
Iter 560: Train loss 0.587, Learning Rate 1.000e-05, It/sec 0.488, Tokens/sec 226.623, Trained Tokens 244782, Peak mem 12.106 GB
Iter 570: Train loss 0.607, Learning Rate 1.000e-05, It/sec 0.596, Tokens/sec 244.295, Trained Tokens 248884, Peak mem 12.106 GB
Iter 580: Train loss 0.626, Learning Rate 1.000e-05, It/sec 0.611, Tokens/sec 233.846, Trained Tokens 252714, Peak mem 12.106 GB
Iter 590: Train loss 0.622, Learning Rate 1.000e-05, It/sec 0.554, Tokens/sec 238.493, Trained Tokens 257022, Peak mem 12.106 GB
Iter 600: Val loss 0.646, Val took 20.027s
Iter 600: Train loss 0.607, Learning Rate 1.000e-05, It/sec 4.685, Tokens/sec 2164.955, Trained Tokens 261643, Peak mem 12.106 GB
Iter 600: Saved adapter weights to adapters_llama_3b/adapters.safetensors and adapters_llama_3b/0000600_adapters.safetensors.
Iter 610: Train loss 0.596, Learning Rate 1.000e-05, It/sec 0.533, Tokens/sec 238.976, Trained Tokens 266129, Peak mem 12.106 GB
Iter 620: Train loss 0.629, Learning Rate 1.000e-05, It/sec 0.534, Tokens/sec 243.171, Trained Tokens 270679, Peak mem 12.106 GB
Iter 630: Train loss 0.622, Learning Rate 1.000e-05, It/sec 0.522, Tokens/sec 254.309, Trained Tokens 275553, Peak mem 12.106 GB
Iter 640: Train loss 0.710, Learning Rate 1.000e-05, It/sec 0.663, Tokens/sec 241.131, Trained Tokens 279189, Peak mem 12.106 GB
Iter 650: Train loss 0.595, Learning Rate 1.000e-05, It/sec 0.506, Tokens/sec 237.818, Trained Tokens 283889, Peak mem 12.106 GB
Iter 660: Train loss 0.608, Learning Rate 1.000e-05, It/sec 0.622, Tokens/sec 251.857, Trained Tokens 287941, Peak mem 12.106 GB
Iter 670: Train loss 0.596, Learning Rate 1.000e-05, It/sec 0.525, Tokens/sec 250.159, Trained Tokens 292709, Peak mem 12.106 GB
Iter 680: Train loss 0.565, Learning Rate 1.000e-05, It/sec 0.451, Tokens/sec 250.664, Trained Tokens 298267, Peak mem 12.106 GB
Iter 690: Train loss 0.598, Learning Rate 1.000e-05, It/sec 0.623, Tokens/sec 248.441, Trained Tokens 302257, Peak mem 12.106 GB
Iter 700: Train loss 0.600, Learning Rate 1.000e-05, It/sec 0.443, Tokens/sec 239.808, Trained Tokens 307671, Peak mem 12.106 GB
Iter 700: Saved adapter weights to adapters_llama_3b/adapters.safetensors and adapters_llama_3b/0000700_adapters.safetensors.
Iter 710: Train loss 0.567, Learning Rate 1.000e-05, It/sec 0.526, Tokens/sec 238.065, Trained Tokens 312193, Peak mem 12.106 GB
Iter 720: Train loss 0.646, Learning Rate 1.000e-05, It/sec 0.599, Tokens/sec 253.881, Trained Tokens 316433, Peak mem 12.106 GB
Iter 730: Train loss 0.627, Learning Rate 1.000e-05, It/sec 0.671, Tokens/sec 248.509, Trained Tokens 320139, Peak mem 12.106 GB
Iter 740: Train loss 0.587, Learning Rate 1.000e-05, It/sec 0.614, Tokens/sec 249.034, Trained Tokens 324195, Peak mem 12.106 GB
Iter 750: Train loss 0.629, Learning Rate 1.000e-05, It/sec 0.591, Tokens/sec 242.646, Trained Tokens 328303, Peak mem 12.106 GB
Iter 760: Train loss 0.640, Learning Rate 1.000e-05, It/sec 0.526, Tokens/sec 245.124, Trained Tokens 332967, Peak mem 12.106 GB
Iter 770: Train loss 0.595, Learning Rate 1.000e-05, It/sec 0.644, Tokens/sec 245.625, Trained Tokens 336779, Peak mem 12.106 GB
Iter 780: Train loss 0.609, Learning Rate 1.000e-05, It/sec 0.581, Tokens/sec 238.123, Trained Tokens 340877, Peak mem 12.106 GB
Iter 790: Train loss 0.639, Learning Rate 1.000e-05, It/sec 0.617, Tokens/sec 243.045, Trained Tokens 344817, Peak mem 12.106 GB
Iter 800: Val loss 0.658, Val took 21.425s
Iter 800: Train loss 0.621, Learning Rate 1.000e-05, It/sec 4.594, Tokens/sec 2302.357, Trained Tokens 349829, Peak mem 12.106 GB
Iter 800: Saved adapter weights to adapters_llama_3b/adapters.safetensors and adapters_llama_3b/0000800_adapters.safetensors.
Iter 810: Train loss 0.559, Learning Rate 1.000e-05, It/sec 0.499, Tokens/sec 253.345, Trained Tokens 354909, Peak mem 12.106 GB
Iter 820: Train loss 0.625, Learning Rate 1.000e-05, It/sec 0.691, Tokens/sec 248.738, Trained Tokens 358509, Peak mem 12.106 GB
Iter 830: Train loss 0.610, Learning Rate 1.000e-05, It/sec 0.587, Tokens/sec 242.270, Trained Tokens 362635, Peak mem 12.106 GB
Iter 840: Train loss 0.603, Learning Rate 1.000e-05, It/sec 0.604, Tokens/sec 245.970, Trained Tokens 366704, Peak mem 12.106 GB
Iter 850: Train loss 0.566, Learning Rate 1.000e-05, It/sec 0.627, Tokens/sec 248.142, Trained Tokens 370660, Peak mem 12.106 GB
Iter 860: Train loss 0.595, Learning Rate 1.000e-05, It/sec 0.513, Tokens/sec 250.849, Trained Tokens 375546, Peak mem 12.106 GB
Iter 870: Train loss 0.611, Learning Rate 1.000e-05, It/sec 0.583, Tokens/sec 242.341, Trained Tokens 379706, Peak mem 12.106 GB
Iter 880: Train loss 0.570, Learning Rate 1.000e-05, It/sec 0.565, Tokens/sec 245.302, Trained Tokens 384050, Peak mem 12.106 GB
Iter 890: Train loss 0.614, Learning Rate 1.000e-05, It/sec 0.559, Tokens/sec 249.611, Trained Tokens 388518, Peak mem 12.106 GB
Iter 900: Train loss 0.592, Learning Rate 1.000e-05, It/sec 0.545, Tokens/sec 233.360, Trained Tokens 392796, Peak mem 12.106 GB
Iter 900: Saved adapter weights to adapters_llama_3b/adapters.safetensors and adapters_llama_3b/0000900_adapters.safetensors.
Iter 910: Train loss 0.589, Learning Rate 1.000e-05, It/sec 0.575, Tokens/sec 235.257, Trained Tokens 396888, Peak mem 12.106 GB
Iter 920: Train loss 0.559, Learning Rate 1.000e-05, It/sec 0.461, Tokens/sec 250.336, Trained Tokens 402324, Peak mem 12.106 GB
Iter 930: Train loss 0.594, Learning Rate 1.000e-05, It/sec 0.622, Tokens/sec 251.058, Trained Tokens 406358, Peak mem 12.106 GB
Iter 940: Train loss 0.602, Learning Rate 1.000e-05, It/sec 0.567, Tokens/sec 248.398, Trained Tokens 410742, Peak mem 12.106 GB
Iter 950: Train loss 0.582, Learning Rate 1.000e-05, It/sec 0.553, Tokens/sec 253.048, Trained Tokens 415322, Peak mem 12.106 GB
Iter 960: Train loss 0.667, Learning Rate 1.000e-05, It/sec 0.563, Tokens/sec 238.937, Trained Tokens 419566, Peak mem 12.106 GB
Iter 970: Train loss 0.564, Learning Rate 1.000e-05, It/sec 0.524, Tokens/sec 245.174, Trained Tokens 424248, Peak mem 12.106 GB
Iter 980: Train loss 0.635, Learning Rate 1.000e-05, It/sec 0.455, Tokens/sec 193.946, Trained Tokens 428514, Peak mem 12.106 GB
Iter 990: Train loss 0.600, Learning Rate 1.000e-05, It/sec 0.601, Tokens/sec 253.396, Trained Tokens 432732, Peak mem 12.106 GB
Iter 1000: Val loss 0.632, Val took 22.836s
Iter 1000: Train loss 0.590, Learning Rate 1.000e-05, It/sec 3.968, Tokens/sec 1815.926, Trained Tokens 437308, Peak mem 12.106 GB
Iter 1000: Saved adapter weights to adapters_llama_3b/adapters.safetensors and adapters_llama_3b/0001000_adapters.safetensors.
Saved final weights to adapters_llama_3b/adapters.safetensors.
"""

In [None]:
# You have two choice 1- Using the cell code , 2- Terminal code
"""
myenv/bin/python -m mlx_lm.lora \
  --model meta-llama/Llama-3.1-8B-Instruct \
  --data data \
  --train \
  --batch-size 1 \
  --num-layers 8 \
  --iters 600 \
  --fine-tune-type lora
"""
#Output:
"""

Loading pretrained model
Fetching 11 files: 100%|████████████████████| 11/11 [00:00<00:00, 118300.88it/s]
Loading datasets
Training
Trainable parameters: 0.042% (3.408M/8030.261M)
Starting training..., iters: 600
Iter 1: Val loss 2.041, Val took 1722.526s
Iter 10: Train loss 1.944, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.451, Trained Tokens 1916, Peak mem 18.338 GB
Iter 20: Train loss 1.203, Learning Rate 1.000e-05, It/sec 0.012, Tokens/sec 2.662, Trained Tokens 4077, Peak mem 18.338 GB
Iter 30: Train loss 0.893, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.769, Trained Tokens 6232, Peak mem 18.545 GB
Iter 40: Train loss 0.679, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.675, Trained Tokens 8340, Peak mem 18.614 GB
Iter 50: Train loss 0.560, Learning Rate 1.000e-05, It/sec 0.012, Tokens/sec 2.572, Trained Tokens 10466, Peak mem 18.614 GB
Iter 60: Train loss 0.690, Learning Rate 1.000e-05, It/sec 0.012, Tokens/sec 2.537, Trained Tokens 12580, Peak mem 18.966 GB
Iter 70: Train loss 0.613, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.811, Trained Tokens 14698, Peak mem 18.966 GB
Iter 80: Train loss 0.544, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.827, Trained Tokens 16931, Peak mem 19.108 GB
Iter 90: Train loss 0.542, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.823, Trained Tokens 19161, Peak mem 19.469 GB
Iter 100: Train loss 0.572, Learning Rate 1.000e-05, It/sec 0.012, Tokens/sec 2.688, Trained Tokens 21316, Peak mem 19.469 GB
Iter 100: Saved adapter weights to adapters/adapters.safetensors and adapters/0000100_adapters.safetensors.
Iter 110: Train loss 0.570, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.323, Trained Tokens 23136, Peak mem 19.469 GB
Iter 120: Train loss 0.569, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.672, Trained Tokens 25198, Peak mem 19.469 GB
Iter 130: Train loss 0.558, Learning Rate 1.000e-05, It/sec 0.012, Tokens/sec 2.794, Trained Tokens 27446, Peak mem 19.469 GB
Iter 140: Train loss 0.593, Learning Rate 1.000e-05, It/sec 0.012, Tokens/sec 2.597, Trained Tokens 29539, Peak mem 19.469 GB
Iter 150: Train loss 0.514, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.728, Trained Tokens 31703, Peak mem 19.469 GB
Iter 160: Train loss 0.571, Learning Rate 1.000e-05, It/sec 0.014, Tokens/sec 2.725, Trained Tokens 33645, Peak mem 19.469 GB
Iter 170: Train loss 0.534, Learning Rate 1.000e-05, It/sec 0.014, Tokens/sec 2.731, Trained Tokens 35593, Peak mem 19.469 GB
Iter 180: Train loss 0.547, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.921, Trained Tokens 37819, Peak mem 19.469 GB
Iter 190: Train loss 0.501, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.453, Trained Tokens 39666, Peak mem 19.469 GB
Iter 200: Val loss 0.538, Val took 1592.634s
Iter 200: Train loss 0.501, Learning Rate 1.000e-05, It/sec 0.116, Tokens/sec 28.023, Trained Tokens 42088, Peak mem 19.469 GB
Iter 200: Saved adapter weights to adapters/adapters.safetensors and adapters/0000200_adapters.safetensors.
Iter 210: Train loss 0.515, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.710, Trained Tokens 44153, Peak mem 19.469 GB
Iter 220: Train loss 0.530, Learning Rate 1.000e-05, It/sec 0.014, Tokens/sec 3.055, Trained Tokens 46397, Peak mem 19.469 GB
Iter 230: Train loss 0.571, Learning Rate 1.000e-05, It/sec 0.014, Tokens/sec 3.109, Trained Tokens 48594, Peak mem 20.605 GB
Iter 240: Train loss 0.531, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.703, Trained Tokens 50617, Peak mem 20.605 GB
Iter 250: Train loss 0.469, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.777, Trained Tokens 52803, Peak mem 20.605 GB
Iter 260: Train loss 0.442, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 3.347, Trained Tokens 55329, Peak mem 20.605 GB
Iter 270: Train loss 0.490, Learning Rate 1.000e-05, It/sec 0.014, Tokens/sec 3.150, Trained Tokens 57556, Peak mem 20.605 GB
Iter 280: Train loss 0.483, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 3.266, Trained Tokens 60085, Peak mem 20.914 GB
Iter 290: Train loss 0.527, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.849, Trained Tokens 62247, Peak mem 20.914 GB
Iter 300: Train loss 0.492, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.589, Trained Tokens 64205, Peak mem 20.914 GB
Iter 300: Saved adapter weights to adapters/adapters.safetensors and adapters/0000300_adapters.safetensors.
Iter 310: Train loss 0.513, Learning Rate 1.000e-05, It/sec 0.014, Tokens/sec 2.902, Trained Tokens 66314, Peak mem 20.914 GB
Iter 320: Train loss 0.505, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 3.007, Trained Tokens 68574, Peak mem 20.914 GB
Iter 330: Train loss 0.521, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.760, Trained Tokens 70660, Peak mem 20.914 GB
Iter 340: Train loss 0.541, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.534, Trained Tokens 72612, Peak mem 20.914 GB
Iter 350: Train loss 0.519, Learning Rate 1.000e-05, It/sec 0.014, Tokens/sec 2.869, Trained Tokens 74696, Peak mem 20.914 GB
Iter 360: Train loss 0.504, Learning Rate 1.000e-05, It/sec 0.014, Tokens/sec 2.757, Trained Tokens 76714, Peak mem 20.914 GB
Iter 370: Train loss 0.487, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.765, Trained Tokens 78802, Peak mem 20.914 GB
Iter 380: Train loss 0.542, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.315, Trained Tokens 80586, Peak mem 20.914 GB
Iter 390: Train loss 0.521, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 3.463, Trained Tokens 83198, Peak mem 20.914 GB
Iter 400: Val loss 0.514, Val took 1662.526s
Iter 400: Train loss 0.477, Learning Rate 1.000e-05, It/sec 0.115, Tokens/sec 26.611, Trained Tokens 85519, Peak mem 20.914 GB
Iter 400: Saved adapter weights to adapters/adapters.safetensors and adapters/0000400_adapters.safetensors.
Iter 410: Train loss 0.474, Learning Rate 1.000e-05, It/sec 0.012, Tokens/sec 2.279, Trained Tokens 87424, Peak mem 20.914 GB
Iter 420: Train loss 0.505, Learning Rate 1.000e-05, It/sec 0.014, Tokens/sec 3.011, Trained Tokens 89647, Peak mem 20.914 GB
Iter 430: Train loss 0.504, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 3.087, Trained Tokens 92035, Peak mem 20.914 GB
Iter 440: Train loss 0.495, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 3.046, Trained Tokens 94466, Peak mem 20.914 GB
Iter 450: Train loss 0.503, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.672, Trained Tokens 96499, Peak mem 20.914 GB
Iter 460: Train loss 0.515, Learning Rate 1.000e-05, It/sec 0.014, Tokens/sec 2.886, Trained Tokens 98635, Peak mem 20.914 GB
Iter 470: Train loss 0.504, Learning Rate 1.000e-05, It/sec 0.014, Tokens/sec 3.064, Trained Tokens 100886, Peak mem 20.914 GB
Iter 480: Train loss 0.468, Learning Rate 1.000e-05, It/sec 0.012, Tokens/sec 2.955, Trained Tokens 103305, Peak mem 20.914 GB
Iter 490: Train loss 0.515, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.808, Trained Tokens 105436, Peak mem 20.914 GB
Iter 500: Train loss 0.495, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.799, Trained Tokens 107603, Peak mem 20.914 GB
Iter 500: Saved adapter weights to adapters/adapters.safetensors and adapters/0000500_adapters.safetensors.
Iter 510: Train loss 0.584, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 3.285, Trained Tokens 110072, Peak mem 20.914 GB
Iter 520: Train loss 0.505, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.832, Trained Tokens 112253, Peak mem 20.914 GB
Iter 530: Train loss 0.505, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 3.327, Trained Tokens 114805, Peak mem 20.914 GB
Iter 540: Train loss 0.464, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 3.267, Trained Tokens 117348, Peak mem 20.914 GB
Iter 550: Train loss 0.513, Learning Rate 1.000e-05, It/sec 0.014, Tokens/sec 3.112, Trained Tokens 119569, Peak mem 20.914 GB
Iter 560: Train loss 0.438, Learning Rate 1.000e-05, It/sec 0.014, Tokens/sec 3.056, Trained Tokens 121756, Peak mem 20.914 GB
Iter 570: Train loss 0.514, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 2.713, Trained Tokens 123820, Peak mem 20.914 GB
Iter 580: Train loss 0.525, Learning Rate 1.000e-05, It/sec 0.013, Tokens/sec 3.026, Trained Tokens 126135, Peak mem 20.914 GB
Iter 590: Train loss 0.520, Learning Rate 1.000e-05, It/sec 0.014, Tokens/sec 2.965, Trained Tokens 128242, Peak mem 20.914 GB
Iter 600: Val loss 0.507, Val took 1597.140s
Iter 600: Train loss 0.532, Learning Rate 1.000e-05, It/sec 0.089, Tokens/sec 16.756, Trained Tokens 130122, Peak mem 20.914 GB
Iter 600: Saved adapter weights to adapters/adapters.safetensors and adapters/0000600_adapters.safetensors.
Saved final weights to adapters/adapters.safetensors.
"""

### Testing the Outputs

In [None]:
# Set args with sys.argv 
sys.argv = [
    "lora.py",
    "--model", str(args.model),
    "--data", str(args.data),
    "--test",
    "--adapter-path", str(args.adapter_path),
]

lora.main()

Loading pretrained model
Loading datasets
Testing
Test loss 0.620, Test ppl 1.859.


On average, the model decides between about 1,859 different words for each word. This is a good result for language models (Usually PPL < 10 is considered good, PPL ~ 2 is close to perfect).


### Pre-Trained vs Fine Tuned LLM Output Compare

In [None]:
# Base Model

messages_base = [
    {"role": "system", "content": "Cutting Knowledge Date: December 2023\nToday Date: 14 Feb 2025\nYou are the support assistant for questions that are asked to you."},
    {"role": "user", "content": "List all transactions and customers from the 'Africa' region."}
]

prompt_base = custom_chat_template(messages_base)
response = mlx_lm.generate(model, tokenizer, prompt=prompt_base, verbose=True)



I don't have any information about transactions or customers from the 'Africa' region. I'm a text-based AI assistant and do not have access to any specific data or databases. If you could provide more context or clarify what you are referring to, I'll do my best to help.
Prompt: 61 tokens, 71.435 tokens-per-sec
Generation: 60 tokens, 68.330 tokens-per-sec
Peak memory: 1.875 GB


In [None]:
# Fine Tuned Model

import mlx_lm.generate as generate
import argparse
import sys

messages_ft = [
    {"role": "system", "content": "Cutting Knowledge Date: December 2023\nToday Date: 14 Feb 2025\nYou are the support assistant for questions that are asked to you."},
    {"role": "user", "content": "List all transactions and customers from the 'Africa' region."}
]

prompt_ft = custom_chat_template(messages_ft)

args = argparse.Namespace(
    model=MODEL_ID,
    adapter_path = "./adapters_llama_3b/",
    prompt = prompt_ft,
    max_tokens = 500,
    verbose = True,
)

# Set args with sys.argv 
sys.argv = [
    "generate.py",
    "--model", str(args.model),
    "--ignore-chat-template",
    "--adapter-path", str(args.adapter_path),
    "--prompt",str(args.prompt),
    "--max-tokens",str(args.max_tokens),
    "--verbose",str(args.verbose)
]

generate.main()



SQL: SELECT transactions.id, customers.name, transactions.amount, customers.country FROM transactions JOIN customers ON transactions.customer_id = customers.id WHERE customers.region = 'Africa';
Prompt: 62 tokens, 160.189 tokens-per-sec
Generation: 34 tokens, 51.437 tokens-per-sec
Peak memory: 3.686 GB


### Fuse Adapters to Model for Building New LLM Model

In [55]:
from mlx_lm import fuse

help(fuse)

Help on module mlx_lm.fuse in mlx_lm:

NAME
    mlx_lm.fuse

FUNCTIONS
    main() -> None

    parse_arguments() -> argparse.Namespace

FILE
    /Users/arda/Documents/llama-finetune/myenv/lib/python3.12/site-packages/mlx_lm/fuse.py




In [58]:
args = argparse.Namespace(
    model=MODEL_ID,
    adapter_path = "./adapters_llama_3b/",
    prompt = prompt_ft,
    max_tokens = 500,
    verbose = True,
    save_path = "./sql_agent/",
    gguf_path = "./sql_agent/gguf_model/ggml-model-sql-agent.gguf"
)

# Set args with sys.argv 
sys.argv = [
    "fuse.py",
    "--model", str(args.model),
    "--adapter-path", str(args.adapter_path),
    "--de-quantize", #Its because dequantize models not supported with gguf format.
    "--save-path", str(args.save_path),
    "--export-gguf",
    #"--gguf-path", str(args.gguf_path)
]

fuse.main()

Loading pretrained model


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

De-quantizing model
Converting to GGUF format
Converted GGUF model saved as: sql_agent/ggml-model-f16.gguf


## Build Model with OLLAMA

In [10]:
import ollama

In [None]:
help(ollama)

In [None]:
# Start ollama api and keep it running.
!ollama serve

In [None]:
#Create models dir
!mkdir -p models

In [None]:
#Create model file 
!nano models/Modelfile FROM ../sql_agent/ggml-model-f16.gguf

In [23]:
#Create Ollama model
!ollama create sql-agent -f models/Modelfile

[?25lgathering model components ⠋ [?25h[?25l[2K[1Ggathering model components ⠙ [?25h[?25l[2K[1Ggathering model components ⠹ [?25h[?25l[2K[1Ggathering model components ⠸ [?25h[?25l[2K[1Ggathering model components ⠼ [?25h[?25l[2K[1Ggathering model components ⠴ [?25h[?25l[2K[1Ggathering model components ⠦ [?25h[?25l[2K[1Ggathering model components ⠧ [?25h[?25l[2K[1Ggathering model components ⠇ [?25h[?25l[2K[1Ggathering model components ⠏ [?25h[?25l[2K[1Ggathering model components ⠋ [?25h[?25l[2K[1Ggathering model components ⠙ [?25h[?25l[2K[1Ggathering model components ⠹ [?25h[?25l[2K[1Ggathering model components ⠸ [?25h[?25l[2K[1Ggathering model components ⠼ [?25h[?25l[2K[1Ggathering model components ⠴ [?25h[?25l[2K[1Ggathering model components ⠦ [?25h[?25l[2K[1Ggathering model components ⠧ [?25h[?25l[2K[1Ggathering model components ⠇ [?25h[?25l[2K[1Ggathering model components ⠏ [?25h[?25l[2K[1Ggathering mode

In [26]:
!ollama list

NAME                ID              SIZE      MODIFIED      
sql-agent:latest    c6b495001637    6.4 GB    7 minutes ago    


In [None]:
!ollama run sql-agent