In [1]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer, SFTConfig

2025-06-02 17:42:51.945073: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748886171.995337     797 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748886172.010996     797 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-02 17:42:52.063779: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# The model that you want to train from the Hugging Face hub
model_name = "EleutherAI/gpt-neo-1.3B" #"HuggingFaceTB/SmolLM2-360M" #"meta-llama/Llama-3.2-3B" #"Qwen/Qwen3-1.7B"

# dataset_name = "mlabonne/guanaco-llama2-1k"

# Fine-tuned model name
new_model = "gpt-neo-1.3B-lora" #"SmolLM2-360M-3epochs-lora" #"Qwen3-1.7B-chat-finetune"

################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

In [3]:
# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

In [4]:
# Output directory where the model predictions and checkpoints will be stored
output_dir = "results"

# Number of training epochs
num_train_epochs = 1

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 2

# Batch size per GPU for evaluation
per_device_eval_batch_size = 2

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule
lr_scheduler_type = "cosine"

# Number of training steps (overrides num_train_epochs)
max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 0

# Log every X updates steps
logging_steps = 25

In [5]:
# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

In [6]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)


In [7]:
# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

In [8]:
# Load dataset (you can process it here)
dataset = load_dataset("dongx1997/NutriBench", "v2", split="train")

# Preprocessing: format as "User: ... Assistant: ..."
def format_example(example):
    prompt = f"User: {example['meal_description']}\nAssistant: The meal contains {example['carb']}g carbs, {example['protein']}g protein, {example['energy']} calories and {example['fat']}g fat."
    tokenized = tokenizer(prompt, truncation=True, padding="max_length", max_length=512)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

dataset = dataset.map(format_example, remove_columns=dataset.column_names)

# Data collator
collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
)

# Set training parameters
training_arguments = SFTConfig(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard",
    packing=packing,
    max_seq_length=max_seq_length,
    dataset_text_field="text",
    label_names=["labels"],
)

In [9]:
# model = model.unload()
if isinstance(model, PeftModel):
    model = model.unload()
    print('done')

In [10]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    processing_class=tokenizer,
    data_collator=collator,
    args=training_arguments,
)



In [11]:
# Train model
trainer.train()

Step,Training Loss
25,3.4996
50,3.4304
75,3.0555
100,2.6396
125,2.1803
150,1.9753
175,1.878
200,1.829
225,1.8146
250,1.7526




TrainOutput(global_step=7809, training_loss=1.510645264195547, metrics={'train_runtime': 8957.458, 'train_samples_per_second': 1.743, 'train_steps_per_second': 0.872, 'total_flos': 5.904225356867174e+16, 'train_loss': 1.510645264195547})

In [12]:
# model.save_pretrained("smolLM2-lora-adapter")
# tokenizer.save_pretrained("smolLM2-lora")
trainer.model.save_pretrained(new_model)

# from transformers import AutoModelForCausalLM, AutoTokenizer
# from peft import PeftModel

# base_model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     load_in_4bit=True,
#     device_map="auto"
# )

# model = PeftModel.from_pretrained(base_model, "smolLM2-lora-adapter")
# tokenizer = AutoTokenizer.from_pretrained(model_name)



In [8]:
import torch

def generate_text(prompt, input_model, max_new_tokens=200, temperature=0.7, top_p=0.9):
    inputs = tokenizer(prompt, return_tensors="pt").to(input_model.device)
    with torch.no_grad():
        outputs = input_model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=top_p,
            pad_token_id=tokenizer.eos_token_id
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [14]:
user_profile = "Age: 52, Diagnosis: Essential Hypertension (Stage 1), Duration of Hypertension: 6 years, Comorbidities: Obesity, Blood Pressure Readings: Average 145/89 mm Hg, Symptoms: Mild headaches, dizziness, BMI: 32.0 kg/m² (Obese category), Relevant Laboratory Tests: Electrocardiogram (ECG): Normal, Lipid Profile: LDL-C 128 mg/dL, HDL-C 42 mg/dL, Triglycerides 160 mg/dL, Kidney Function: Normal, Blood Glucose: Normal"
prompt = f"You are an expert nutritionist agent. Generate a list of daily nutrient targets for a given user with the profile: {user_profile}"

result = generate_text(prompt)
print(result)

You are an expert nutritionist agent. Generate a list of daily nutrient targets for a given user with the profile: Age: 52, Diagnosis: Essential Hypertension (Stage 1), Duration of Hypertension: 6 years, Comorbidities: Obesity, Blood Pressure Readings: Average 145/89 mm Hg, Symptoms: Mild headaches, dizziness, BMI: 32.0 kg/m² (Obese category), Relevant Laboratory Tests: Electrocardiogram (ECG): Normal, Lipid Profile: LDL-C 128 mg/dL, HDL-C 42 mg/dL, Triglycerides 160 mg/dL, Kidney Function: Normal, Blood Glucose: Normal, Urine Protein: 0.8g/d, Urine Protein: 2.4g/d, Urine Protein: 0.4g/d, Urine Protein: 0.3g/d, Urine Protein: 0.5g/d, Urine Protein: 0.1g/d, Urine Protein: 0.5g/d, Urine Protein: 0.3g/d, Urine Protein: 0.3g/d, Urine Protein: 0.6g/d, Urine Protein: 0.3g/d, Urine Protein: 0.3g/d, Urine Protein: 0.1g/d, Urine Protein: 0.1g/d, Urine Protein: 0.4g/d, Urine Protein: 0.3g/d, Urine Protein: 0.3g/d, Urine Protein: 0.3g/d, U


In [9]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

model_name = "EleutherAI/gpt-neo-1.3B"
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto"
)

model = PeftModel.from_pretrained(base_model, "gpt-neo-1.3B-lora")
tokenizer = AutoTokenizer.from_pretrained(model_name)

# user_profile = "Age: 52, Diagnosis: Essential Hypertension (Stage 1), Duration of Hypertension: 6 years, Comorbidities: Obesity, Blood Pressure Readings: Average 145/89 mm Hg, Symptoms: Mild headaches, dizziness, BMI: 32.0 kg/m² (Obese category), Relevant Laboratory Tests: Electrocardiogram (ECG): Normal, Lipid Profile: LDL-C 128 mg/dL, HDL-C 42 mg/dL, Triglycerides 160 mg/dL, Kidney Function: Normal, Blood Glucose: Normal"
# prompt = f"You are an expert nutritionist agent. Generate a list of daily nutrient targets for a given user with the profile: {user_profile}"

prompt = "Generate 1-day meal plan(breakfast, lunch, dinner) for below patient with hypertension to regulate his medical condition without any drugs/medicines. Below is user profile User profile: Age: 31, Diagnosis: Essential Hypertension (Stage 1), Duration of Hypertension: 5 years, Comorbidities: None (or list: diabetes, obesity, etc.), Blood Pressure Readings: Average 162/88 mm Hg (recent readings), Symptoms: Occasional headaches, mild shortness of breath; no chest pain, BMI: 31 kg/m² (Obese category), Relevant Laboratory Tests:, Electrocardiogram (ECG): Normal, Lipid Profile: Mildly elevated cholesterol, Kidney Function: Normal, Blood Glucose: Normal"

result = generate_text(prompt, base_model)
print(result)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Generate 1-day meal plan(breakfast, lunch, dinner) for below patient with hypertension to regulate his medical condition without any drugs/medicines. Below is user profile User profile: Age: 31, Diagnosis: Essential Hypertension (Stage 1), Duration of Hypertension: 5 years, Comorbidities: None (or list: diabetes, obesity, etc.), Blood Pressure Readings: Average 162/88 mm Hg (recent readings), Symptoms: Occasional headaches, mild shortness of breath; no chest pain, BMI: 31 kg/m² (Obese category), Relevant Laboratory Tests:, Electrocardiogram (ECG): Normal, Lipid Profile: Mildly elevated cholesterol, Kidney Function: Normal, Blood Glucose: Normal (5.3), Vitamins: No vitamin deficiency, Minerals: Normal potassium, Calcium: 1.4, Magnesium: 0.9, Phosphorus: 0.1, Sulfur: 0.8, Protein: 9.3, Fiber: 0.6, Protein Efficiency Ratio: 1.7, Protein Carbohydrate: 28.8, Protein Fiber: 4.8, Protein: 18.4g, Protein: 28.8g, Protein: 29.2g, Protein: 19.5g, Protein: 28.8g, Protein: 29.2g, Protein: 29.2g, Pr

In [7]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

model_name = "HuggingFaceTB/SmolLM2-360M"
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto"
)

model = PeftModel.from_pretrained(base_model, "SmolLM2-360M-3epochs-lora")
tokenizer = AutoTokenizer.from_pretrained(model_name)

prompt = "Generate 1-day meal plan(breakfast, lunch, dinner) for below patient with hypertension to regulate his medical condition without any drugs/medicines. Below is user profile User profile: Age: 31, Diagnosis: Essential Hypertension (Stage 1), Duration of Hypertension: 5 years, Comorbidities: None (or list: diabetes, obesity, etc.)"

result = generate_text(prompt)
print(result)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Generate 1-day meal plan(breakfast, lunch, dinner) for below patient with hypertension to regulate his medical condition without any drugs/medicines. Below is user profile User profile: Age: 31, Diagnosis: Essential Hypertension (Stage 1), Duration of Hypertension: 5 years, Comorbidities: None (or list: diabetes, obesity, etc.), Blood type: O-, Diet: 100g of boiled white rice, 120g of boiled white rice, 200g of water, 100g of cooked white rice, 50g of sugar, 100g of cooked white rice, 100g of fried rice, 50g of fried rice, 100g of cooked white rice, 100g of fried rice, 100g of fried rice, 100g of cooked white rice, 100g of fried rice, 100g of cooked white rice, 100g of fried rice, 100g of cooked white rice, 100g of fried rice, 100g of cooked white rice, 100g of fried rice, 100g of cooked white rice, 100g


In [12]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch

def generate_nutrient_targets(user_profile: str, 
                               base_model_name: str = "EleutherAI/gpt-neo-1.3B", 
                               lora_weights_path: str = "./qlora_nutribench", use_base_model=False):

    # Quantization config for QLoRA models
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4"
    )
    device_map = {"": 0}

    # Load tokenizer and base model
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    tokenizer.pad_token = tokenizer.eos_token  # Required for GPT-Neo sometimes

    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        quantization_config=bnb_config,
        device_map=device_map
    )

    if use_base_model:
        model = base_model

    # Load LoRA weights
    model = PeftModel.from_pretrained(base_model, lora_weights_path)
    model.eval()

    # Prompt
    prompt = f"You are an expert nutritionist agent. Generate 1-day meal plan for a given user with the profile: {user_profile}"

    # Tokenize
    inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(model.device)

    # Generate
    with torch.no_grad():
        output_ids = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=400,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id
        )

    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    completion = response[len(prompt):].strip()
    return completion


In [13]:
user_profile = "User profile: Age: 31, Diagnosis: Essential Hypertension (Stage 1), Duration of Hypertension: 5 years, Comorbidities: None (or list: diabetes, obesity, etc.), Blood Pressure Readings: Average 162/88 mm Hg (recent readings), Symptoms: Occasional headaches, mild shortness of breath; no chest pain, BMI: 31 kg/m² (Obese category), Relevant Laboratory Tests: Lipid Profile: Mildly elevated cholesterol, Blood Glucose: Normal"
result = generate_nutrient_targets(user_profile, "HuggingFaceTB/SmolLM2-360M", "SmolLM2-360M-finetune-lora", use_base_model=False)
print(result)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


, HDL: 1.26g/dL (Normal category), Urine: No evidence of a UTI, Vitamins and Minerals: 16.8g of dried beans, 100g of maize flour, 50g of coconut oil, 4g of salt, 30g of sugar, 5g of tea leaves, 1g of tea leaves, 0.0g of tea leaves, 0.5g of tea leaves, 50g of vegetable fats and oils, 5g of vegetable fats and oils, 10g of vegetable fats and oils, 1g of tea leaves, 0.3g of tea leaves, 0.1g of tea leaves, 0.2g of tea leaves, 0.2g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves, 0.0g of tea leaves,


In [8]:
user_profile = "User profile: Age: 31, Diagnosis: Essential Hypertension (Stage 1), Duration of Hypertension: 5 years, Comorbidities: None (or list: diabetes, obesity, etc.), Blood Pressure Readings: Average 162/88 mm Hg (recent readings), Symptoms: Occasional headaches, mild shortness of breath; no chest pain, BMI: 31 kg/m² (Obese category), Relevant Laboratory Tests:, Electrocardiogram (ECG): Normal, Lipid Profile: Mildly elevated cholesterol, Kidney Function: Normal, Blood Glucose: Normal"
result = generate_nutrient_targets(user_profile, "EleutherAI/gpt-neo-1.3B", "gpt-neo-1.3B-lora", use_base_model=True)
print(result)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


, Cholesterol: 130 mg/200g (cholesterol), Glucose: 90.0g (200.0g), Protein: 45.0g (200.0g), Protein: 9.0g (200.0g), Protein: 0.3g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein: 0.0g (200.0g), Protein


In [4]:
user_profile = "User profile: Age: 31, Diagnosis: Essential Hypertension (Stage 1), Duration of Hypertension: 5 years, Comorbidities: None (or list: diabetes, obesity, etc.), Blood Pressure Readings: Average 162/88 mm Hg (recent readings), Symptoms: Occasional headaches, mild shortness of breath; no chest pain, BMI: 31 kg/m² (Obese category), Relevant Laboratory Tests:, Electrocardiogram (ECG): Normal, Lipid Profile: Mildly elevated cholesterol, Kidney Function: Normal, Blood Glucose: Normal"
result = generate_nutrient_targets(user_profile, "EleutherAI/gpt-neo-1.3B", "gpt-neo-1.3B-lora")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [5]:
print(result)

, Vitamins and Minerals: 1g vitamin C, 0g vitamin B12, 0g vitamin B6, 0g vitamin B2, 0g vitamin B3, 0g vitamin B5, 0g vitamin B1, 1g vitamin A, 0g vitamin D3, 0g vitamin E, 0g vitamin K, 0g iron, 0g potassium, 0g sodium, 0g sucrose, 0g zinc, 0g calcium, 0g protein.

N-3 and N-6 fatty acids are included in the nutritional needs for this diet, along with 2g of protein and 0.2g of fat. The nutritional needs for this diet are for the individual.

Nutritional and Nutritional Requirements

2g protein

0.2g fat

0g carbs

0g fat

0g protein

0.0g sugar

0.0g protein

0.0g fat

0.0g protein

0.0g fat

0.0g protein

0.0g fat

0.0g protein

0.0g fat

0.0g protein

0.0g fat

0.0g protein

0.0g fat

0.0g protein

0.0g fat

0.0g protein

0.0g fat

0.0g protein

0.0g fat

0.0g protein

0.0g fat

0.0g protein

0.0g fat

0.0g protein

0.0g fat

0.0g protein

0.0g fat

0.0g protein

0.0g fat

0.0g protein

0.0g fat

0.
