<a href="https://colab.research.google.com/github/dastanrab/Data-Structures/blob/master/calori_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install unsloth
!pip install bitsandbytes
!pip install trl
!pip install accelerate
!pip install datasets
!pip install transformers
!pip install protobuf==3.20.3
!git clone https://github.com/ggml-org/llama.cpp
%cd llama.cpp
# !cmake -B build
# !cmake --build build --config Release
git checkout b3345
git submodule update --init --recursive
make clean
make all -j
git log -

from unsloth import FastLanguageModel
from datasets import load_dataset

# Load base model with Unsloth
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = 'unsloth/Phi-3-mini-4k-instruct-bnb-4bit',
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True
)

# Load dataset directly from Hugging Face
dataset = load_dataset("Codatta/MM-Food-100K", split="train")

In [None]:
cd llama.cpp
git checkout b3345
git submodule update --init --recursive
make clean
make all -j
git log -1

In [None]:
import json
# Map dataset to text format for SFTTrainer
def to_text(ex):
    # ورودی (پرومپت) از ستون‌های دیتاست ساخته میشه
    prompt = (
        f"Dish: {ex['dish_name']}\n"
        f"Ingredients: {', '.join(ex['ingredients'])}\n"
        f"Portion: {', '.join(ex['portion_size'])}\n"
        f"Cooking method: {ex['cooking_method']}"
    )

    # خروجی (ریسپانس) پروفایل غذاییه
    response = json.dumps(ex["nutritional_profile"], ensure_ascii=False)

    msgs = [
        {"role": "user", "content": prompt},
        {"role": "assistant", "content": response},
    ]
    return {
        "text": tokenizer.apply_chat_template(
            msgs, tokenize=False, add_generation_prompt=False
        )
    }

dataset = dataset.map(to_text, remove_columns=dataset.column_names)

In [None]:
# Prepare model for LoRA fine-tuning
model = FastLanguageModel.get_peft_model(
    model,
    r = 64,
    target_modules=['q_proj','k_proj','v_proj','o_proj','gate_proj','up_proj','down_proj'],
    lora_alpha = 128,
    lora_dropout = 0,
    bias = 'none',
    use_gradient_checkpointing = 'unsloth'
)

In [None]:
from trl import SFTTrainer, SFTConfig

trainer = SFTTrainer(
    model = model,
    train_dataset = dataset,
    tokenizer = tokenizer,
    dataset_text_field = 'text',
    max_seq_length = 2048,
    args = SFTConfig(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 10,
        max_steps = 60,  # small for demo, increase for real training
        logging_steps = 1,
        output_dir = "outputs",
        optim = "adamw_8bit",
        num_train_epochs = 1
    ),
)

trainer.train()

In [None]:
# Test inference
FastLanguageModel.for_inference(model)

messages = [
    {"role": "user", "content": "Dish: Fried Chicken\nIngredients: chicken, breading, oil\nPortion: 300g\nCooking method: Frying"}
]

inputs = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt"
).to("cuda")

outputs = model.generate(
    input_ids=inputs,
    max_new_tokens=128,
    temperature=0.7,
    top_p=0.9,
    do_sample=True
)

response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
print(response)

In [None]:
# Export to GGUF for Ollama

model.save_pretrained_gguf(
    "/content/gguf_food_model",
    tokenizer,
    quantization_method="q4_k_m",
    maximum_memory_usage = 0.3)

In [None]:

# !python3 convert_hf_to_gguf.py ../gguf_food_model --outfile ../gguf_food_model_final.gguf
# %%bash
# git clone https://github.com/ggerganov/llama.cpp
# cd llama.cpp && make clean && LLAMA_CUDA=1 make all -j
!./llama.cpp/quantize /content/gguf_food_model_final.gguf /content/gguf_food_model q8_0