---
## Setup
---

### **Install Dependencies**

In [None]:
!pip install -qU transformers==4.48.3 datasets==3.2.0 optimum==1.24.0 
!pip install wandb huggingface_hub[hf_xet]
!pip install -qU json-repair==0.29.1

### **Install LLaMA-Factory**

To **install the current project (like LLaMA-Factory)** in **development mode** with **extra features** (like GPU support and 8-bit training), so we can:
* Edit the code locally and use it without reinstalling.
* Start fine-tuning immediately
* Customize the training pipeline
> I followed the [installation documentation](https://llamafactory.readthedocs.io/en/latest/getting_started/installation.html).



In [2]:
%cd /kaggle/working/
!rm -rf LLaMA-Factory
!git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
%cd LLaMA-Factory
%ls

/kaggle/working
Cloning into 'LLaMA-Factory'...
remote: Enumerating objects: 357, done.[K
remote: Counting objects: 100% (357/357), done.[K
remote: Compressing objects: 100% (276/276), done.[K
remote: Total 357 (delta 76), reused 304 (delta 66), pack-reused 0 (from 0)[K
Receiving objects: 100% (357/357), 9.64 MiB | 31.06 MiB/s, done.
Resolving deltas: 100% (76/76), done.
/kaggle/working/LLaMA-Factory
[0m[01;34massets[0m/       [01;34mevaluation[0m/  MANIFEST.in     requirements.txt  [01;34mtests[0m/
CITATION.cff  [01;34mexamples[0m/    pyproject.toml  [01;34mscripts[0m/
[01;34mdata[0m/         LICENSE      README.md       setup.py
[01;34mdocker[0m/       Makefile     README_zh.md    [01;34msrc[0m/


In [122]:
%cd /kaggle/working/LLaMA-Factory/
!pip install -e . # Install LlaMaFactory in order to use LLaMA-Factory CLI

/kaggle/working/LLaMA-Factory
Obtaining file:///kaggle/working/LLaMA-Factory
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: llamafactory
  Building editable for llamafactory (pyproject.toml) ... [?25l[?25hdone
  Created wheel for llamafactory: filename=llamafactory-0.9.3.dev0-0.editable-py3-none-any.whl size=26940 sha256=fa9134c33b7ed01efc7f993da3b01ed82cdbcb27ae5ac32445023cd99e62813b
  Stored in directory: /tmp/pip-ephem-wheel-cache-aq46ugn9/wheels/96/d8/b2/8fc665ed70525080a50f3ff8538833c6f74cd48eb82195d0f8
Successfully built llamafactory
Installing collected packages: llamafactory
  Attempting uninstall: llamafactory
    Found existing installation: llamafactory 0.9.3.dev0
    Uninstalling llamafactory-0.9.3.dev0:
      Successfully unin

In [None]:
!GRADIO_SHARE=1
!llamafactory-cli version # webui

2025-05-08 13:38:16.502271: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746711496.720150   39276 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746711496.780438   39276 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-08 13:38:17.240307: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
----------------------------------------------------------
| Welcome to LLaMA Factory, version 0.9.3.dev0           |

In [84]:
# import llamafactory

### **Import Dependencies**

In [87]:
import os
import json
import json_repair
from tqdm import tqdm
from typing import List
from pydantic import BaseModel, Field

from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from IPython.display import JSON

### **Logins**

In [88]:
# if using kaggle 
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("HUGGINGFACEHUB_API_TOKEN")
wandb_token = user_secrets.get_secret("WANDB_API_KEY")


# uncomment if using colab
# from google.colab import userdata
# hf_token = userdata.get("HUGGINGFACEHUB_API_TOKEN")

# import kagglehub
# from kagglehub import auth, login
# kagglehub.login(validate_credentials=True)

In [89]:
import wandb
from huggingface_hub import whoami, login

# or use ---> !huggingface-cli login --token {mytoken} # another method
login(token = hf_token)
wandb.login(key=wandb_token)
# !chmod 600 ~/.netrc
# !cat ~/.netrc

JSON(whoami())

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


<IPython.core.display.JSON object>

### **Mount Drive**

In [None]:
# Uncomment if you need to mount drive in kaggle

# !apt-get install -y -qq software-properties-common
# !add-apt-repository -y ppa:alessandro-strada/ppa
# !apt-get update -qq
# !apt-get install -y -qq google-drive-ocamlfuse
# !pip install pydrive

In [25]:
# Uncomment if you need to mount drive in kaggle

# from pydrive.auth import GoogleAuth
# from pydrive.drive import GoogleDrive
# from google.colab import auth
# from oauth2client.client import GoogleCredentials

# auth.authenticate_user()
# gauth = GoogleAuth()
# gauth.credentials = GoogleCredentials.get_application_default()
# drive = GoogleDrive(gauth)
# your_client_secret='4/0AUJR-x5xn-4uLV-EY18thcr0VacO5dCgomO124Fki0c66vPG8zx_JE3bOXxmiXFFl9xhWw'

### **Caching Directories**

In [90]:
os.environ['HF_HOME'] = '/kaggle/working/drive/MyDrive/llm-finetuning/hf_home'
# FULL DATA HERE ==> /kaggle/working/drive/MyDrive/llm-finetuning/datasets_cache
os.environ['DATASETS'] = '/kaggle/working/drive/MyDrive/llm-finetuning/datasets_samples'
os.environ['LLAMAFACTORY_OUTPUT']= '/kaggle/working/drive/MyDrive/llm-finetuning/llama_factory_output'

os.makedirs(os.environ['HF_HOME'], exist_ok=True)
os.makedirs(os.environ['DATASETS'], exist_ok=True)
os.makedirs(os.environ['LLAMAFACTORY_OUTPUT'], exist_ok=True)

In [91]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["XLA_FLAGS"] = "--xla_gpu_force_compilation_parallelism=1"

---
## Load Dataset
---

### **Load Data From Kaggle**

**Download the dataset from Kaggle**

In [28]:
# Uucomment if using Colab

# data_path = kagglehub.dataset_download('hebamo7amed/real-estate-data-for-llm-fine-tuning')
# tabular_data_path = f"{data_path}/tabular_data"
# text_data_path = f"{data_path}/text_data"
# text_data_path

**Read Datasets From Kaggle**

In [None]:
# with open(f"{text_data_path}/text_train_data.jsonl", "r") as f:
#    train_data = json.load(f)

# with open(f"{text_data_path}/text_val_data.jsonl", "r") as f:
#   val_data = json.load(f)

# with open(f"{text_data_path}/sample_50.jsonl", "r") as f:
#   sample_data = json.load(f)

# print("Training data size = ", len(train_data))
# print("Validation data size = ", len(val_data))
# print("Sample data size = ", len(sample_data))

Training data size =  2201006
Validation data size =  22233
Sample data size =  50


### **Load Full Dataset from HuggingFace Hub**


This dataset was created from structured real estate data and uploaded to Hugging Face in the first notebook. It is formatted for instruction-based fine-tuning an LLM.

In [42]:
repo_id = "heba1998/real-estate-data-for-llm-fine-tuning"

dataset = load_dataset(repo_id,
    data_files={
    "train": "text_train_data.jsonl",
    "validation": "text_val_data.jsonl"
    },
    # cache_dir = os.environ['DATASETS_CACHE'],
    trust_remote_code = True
)

dataset

README.md:   0%|          | 0.00/31.0 [00:00<?, ?B/s]

text_train_data.jsonl:   0%|          | 0.00/1.66G [00:00<?, ?B/s]

text_val_data.jsonl:   0%|          | 0.00/16.7M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'query', 'response'],
        num_rows: 2201006
    })
    validation: Dataset({
        features: ['id', 'query', 'response'],
        num_rows: 22233
    })
})

In [None]:
train_data = dataset['train']
val_data = dataset['validation']

### **Load Dataset Sample from hugging Face Hub**

A data sample that was created from structured real estate data and uploaded to Hugging Face in the first notebook. It is formatted for instruction-based fine-tuning an LLM.

In [92]:
from datasets import load_dataset
dataset = load_dataset(
    path  ='heba1998/real-estate-data-sample-for-llm-fine-tuning' # data sample
)
dataset

README.md:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['system', 'instruction', 'input', 'output', 'history'],
        num_rows: 5000
    })
    validation: Dataset({
        features: ['system', 'instruction', 'input', 'output', 'history'],
        num_rows: 200
    })
})

### **Save to Working directory**

In [51]:
import json
from datasets import DatasetDict

# Assuming your DatasetDict is named `dataset_dict`
splits = ["train", "validation"]
names = ["llm_train_data", "llm_val_data"]

for split, name in zip(splits, names):
    output_path = f"{os.environ['DATASETS']}/{name}.jsonl"
    
    # Open file and write each example as a JSON line
    with open(output_path, "w", encoding="utf-8") as f:
        for example in dataset[split]:
            json.dump(example, f, ensure_ascii=False)  # Write as JSON
            f.write("\n")  # Newline for next example

In [93]:
!ls {os.environ['DATASETS']}

llm_train_data.jsonl  llm_val_data.jsonl


---
## Format Dataset for LlaMa-Factory
---

In [97]:
from fine_tuning_helpers import ResponseSchema, SYSTEM_MESSAGE 

JSON(ResponseSchema.model_json_schema())

<IPython.core.display.JSON object>

In [99]:
print(SYSTEM_MESSAGE) 

You are an expert in real estate price estimation with experience in the housing market.
Given the following house features, predict the final sale price.
#### Critical notes:
- Some feature values are missing.
- Broker ID and street are encoded for privacy.
- Do not include any introduction or conclusion.


### **Refactoring Datasets for LLaMA-Factory**

Transform training and validation sets into a specific dictionary format that LLaMA-Factory expects for instruction tuning following the [documentation](https://llamafactory.readthedocs.io/en/latest/getting_started/data_preparation.html).

##### **Current Format Example**
```json
{
  "id": 0,
  "query": "A house listing in the USA with the following details: - Status: for_sale - Number of bedrooms: -1 - Number of bathrooms: -1 - Land size: 0.07000000029802322 acres - Address (city, state, zip): Washington, District of Columbia, 20002.0 - House size: -1.0 sqft Your task is to predict the final sale price in $? ### Output schema: {'properties': {'estimated_house_price': {'description': 'Numerical value that expresses the estimated house price', 'example': 85000.0, 'title': 'Estimated House Price', 'type': 'number'}}, 'required': ['estimated_house_price'], 'title': 'ResponseSchema', 'type': 'object'} ### Response: ```json",
  "response": "{'estimated_house_price':2500000.0}"
}
```
##### **Needed Templete:**
```json
{
  "system": "You are a helpful assistant that predicts house prices based on listing data.",
  "instruction": "A house listing in the USA with the following details: - Status: for_sale - Number of bedrooms: -1 - Number of bathrooms: -1 - Land size: 0.07000000029802322 acres - Address (city, state, zip): Washington, District of Columbia, 20002.0 - House size: -1.0 sqft Your task is to predict the final sale price in $? ### Output schema: {'properties': {'estimated_house_price': {'description': 'Numerical value that expresses the estimated house price', 'example': 85000.0, 'title': 'Estimated House Price', 'type': 'number'}}, 'required': ['estimated_house_price'], 'title': 'ResponseSchema', 'type': 'object'} ### Response: ```json",
  "input": "",
  "output": "{'estimated_house_price':2500000.0}",
  "history": []
}

```

In [49]:
def refactor_data(df, color="green"):

  llamafactory_data = []
  bar_format = '{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}{postfix}]'

  for row in tqdm(df,
                  total=len(df), unit="sample",
                  ncols=100, colour= color,
                  desc="Format Data for LlaMa Factory",
                  bar_format=bar_format):

      llamafactory_data.append({
          "system": system_message,
          "instruction": row['query'],
          "input": "",
          "output": row['response'],
          "history": []
      })
  return llamafactory_data


In [63]:
# release to optimize memory usage
del llm_train_data, llm_val_data

### **Push the data after refactoring to Hugging Face (Optional)**

Optionaly push the new datasets (`llm_train_data.jsonl` and `llm_val_data.jsonl`) to hugging face after refactor it to suit the LlaMa-Factory template.

> By applying this step you can call it directly in `LLaMA-Factory/data/dataset_info.json` from hugging face.

In [66]:
from datetime import datetime

username = "heba1998"
data_title = "Real Estate Data Sample For LLM Fine-Tuning"
repo_name = data_title.replace(" ", "-").lower()
date = datetime.now().strftime("%Y-%m-%d")

metadata = {
    "title": data_title,
    "id": f"{username}/{repo_name}",
    "licenses": [{"name": "CC0-1.0"}],
    "description": "Translated Text data generated from tabular US real estate data for LLM fine-tuning",
    "version": "2.0",
    "create_at": date,
    "author": username,
    "tags": [
        "LLM",
        "Text Data",
        "Real Estate",
        "LlaMa-Factory"
    ],
}


In [70]:
from huggingface_hub import HfApi
api = HfApi()

# Uncomment if first time upload
# api.create_repo(repo_id=f"{username}/{repo_name}",
#                 repo_type="dataset",
#                 private=True)

api.upload_file(
    path_or_fileobj=f"{os.environ['DATASETS']}/llm_train_data.jsonl",
    repo_id=f"{username}/{repo_name}",
    repo_type="dataset",
    create_pr=True,
    path_in_repo="llm_train_data.jsonl",
    commit_message="Add 5000 sample of llm_train_data.jsonl after refactoring for llamafactory",
    revision="main",
    )

api.upload_file(
    path_or_fileobj=f"{os.environ['DATASETS']}/llm_val_data.jsonl",
    repo_id=f"{username}/{repo_name}",
    repo_type="dataset",
    create_pr=True,
    path_in_repo="llm_val_data.jsonl",
    commit_message="Add 200 sample llm_train_data.jsonl file after refactoring for llamafactory",
    revision="main",
    )

CommitInfo(commit_url='https://huggingface.co/datasets/heba1998/real-estate-data-sample-for-llm-fine-tuning/commit/47e26f86ae6d7162b014adc1b8a79204cd0eb546', commit_message='Add 200 sample llm_train_data.jsonl file after refactoring for llamafactory', commit_description='', oid='47e26f86ae6d7162b014adc1b8a79204cd0eb546', pr_url='https://huggingface.co/datasets/heba1998/real-estate-data-sample-for-llm-fine-tuning/discussions/4', repo_url=RepoUrl('https://huggingface.co/datasets/heba1998/real-estate-data-sample-for-llm-fine-tuning', endpoint='https://huggingface.co', repo_type='dataset', repo_id='heba1998/real-estate-data-sample-for-llm-fine-tuning'), pr_revision='refs/pr/4', pr_num=4)

---
## Fine-tune Using LoRA
---

Fine-tune the **`Qwen3-0.6B-Instruct`** model on a real estate dataset using LoRA (Low-Rank Adaptation) with the **LLaMA-Factory** training framework. Here is my steps:

1. **Load pretrained Model:** Load the pretrained `Qwen3-0.6B-Instruct` model from Hugging Face.
2. **Fine-Tune:** apply LoRA fine-tuning on *all* target layers with rank **64**.
3. **Monitor Training** using W\&B (`wandb`) with metrics and loss plots.
4. **Model Checkpoints:** save the fine-tuned model to Google drive.
5. **Push the resulting fine-tuned model to Hugging Face Hub**, privately.


### **Register the Custom Real Estate Dataset**
Registering two custom datasets (`real_estate_finetune_train` and `real_estate_finetune_val`) into the LLaMA-Factory’s dataset metadata file `dataset_info.json` following these steps:

1. **Loading** the existing dataset configuration file (`dataset_info.json`).
2. **Adding metadata** for your training and validation datasets:
   * Where the data files are stored (paths to `.jsonl` files in the Kaggle cache).
   * Which **columns in your dataset** should be mapped to expected fields:

| Target Field | Source Column in my JSONL |
| ------------ | --------------------------- |
| `prompt`     | `instruction`               |
| `query`      | `input`                     |
| `response`   | `output`                    |
| `system`     | `system`                    |
| `history`    | `history`                   |

3. **Saving** the updated [LLaMA-Factory/data/dataset_info.json](https://https://github.com/hiyouga/LLaMA-Factory/blob/main/data/dataset_info.json/) file, so the training system can use these datasets with the correct structure during fine-tuning.


#### **Call Datasets Locally**

In [100]:
!ls /kaggle/working/LLaMA-Factory/data/

alpaca_en_demo.json  glaive_toolcall_en_demo.json  mllm_demo.json
alpaca_zh_demo.json  glaive_toolcall_zh_demo.json  mllm_video_audio_demo.json
belle_multiturn      hh_rlhf_en			   mllm_video_demo.json
c4_demo.jsonl	     identity.json		   README.md
dataset_info.json    kto_en_demo.json		   README_zh.md
dpo_en_demo.json     mllm_audio_demo.json	   ultra_chat
dpo_zh_demo.json     mllm_demo_data		   wiki_demo.txt


In [101]:
file_path = "/kaggle/working/LLaMA-Factory/data/dataset_info.json"

# Load existing JSON
with open(file_path, "r") as f:
    dataset_info = json.load(f)

# Append new entries
dataset_info["real_estate_finetune_train"] = {
    "file_name": f'{os.environ["DATASETS"]}/llm_train_data.jsonl',
    "columns": {
        "prompt": "instruction",
        "query": "input",
        "response": "output",
        "system": "system",
        "history": "history"
    }
}

dataset_info["real_estate_finetune_val"] = {
    "file_name":f'{os.environ["DATASETS"]}/llm_val_data.jsonl',
    "columns": {
        "prompt": "instruction",
        "query": "input",
        "response": "output",
        "system": "system",
        "history": "history"
    }
}

# Write back the updated JSON
with open(file_path, "w") as f:
    json.dump(dataset_info, f, indent=2)

> LLaMA-Factory will now **recognize** the real estate fine-tuning datasets (`real_estate_finetune_train`, `real_estate_finetune_val`) and understand how to **parse each field** correctly for training and evaluation.

#### **Call Datasets from Hugging Face Repo**

In [73]:
# file_path = "/kaggle/working/LLaMA-Factory/data/dataset_info.json"

# # Load existing JSON
# with open(file_path, "r") as f:
#     dataset_info = json.load(f)


# # Append new entries
# dataset_info["real_estate_hf_train"] ={
#     "hf_repo": f"{username}/{repo_name}",                    # repo id
#     "hf_file": "llm_train_data.jsonl",                       # HF train file name
#     "file_name": f'{os.environ["DATASETS"]}/llm_train_data.jsonl', # Local train file name
#     "columns": {
#       "prompt": "instruction",
#       "query": "input",
#       "response": "output",
#       "system": "system",
#       "history": "history"
#     }
#   }

# dataset_info["real_estate_hf_val"]= {
#     "hf_repo": f"{username}/{repo_name}",                    # repo id
#     "hf_file": "llm_val_data.jsonl",                         # HF validation file name
#     "file_name": f'{os.environ["DATASETS"]}/llm_val_data.jsonl', # Local validation file name
#     "columns": {
#       "prompt": "instruction",
#       "query": "input",
#       "response": "output",
#       "system": "system",
#       "history": "history"
#     },
#   }

# # Write back the updated JSON
# with open(file_path, "w") as f:
#     json.dump(dataset_info, f, indent=2)

### **Fine-Tune YAML Configuration File**
Fine-tune a lightweight, instruction-tuned LLM (`Qwen3-0.6B-Instruct`) on a real estate-specific dataset, optimizing it to perform better on tasks related to real estate queries or text generation.

In [102]:
!ls /kaggle/working/LLaMA-Factory/examples/train_lora

llama3_lora_dpo.yaml	   llama3_lora_sft.sh
llama3_lora_eval.yaml	   llama3_lora_sft.yaml
llama3_lora_kto.yaml	   llama3_preprocess.yaml
llama3_lora_ppo.yaml	   llama4_lora_sft_ds3.yaml
llama3_lora_pretrain.yaml  qwen2_5vl_lora_dpo.yaml
llama3_lora_reward.yaml    qwen2_5vl_lora_sft.yaml
llama3_lora_sft_ds3.yaml   real_estate_qwen3_lora.yaml
llama3_lora_sft_ray.yaml


In [60]:
# !cat /kaggle/working/LLaMA-Factory/examples/train_lora/qwen2_5vl_lora_sft.yaml

In [71]:
%%writefile /kaggle/working/LLaMA-Factory/examples/train_lora/real_estate_qwen3_lora.yaml

### model metadata (config)
model_name_or_path: Qwen/Qwen3-0.6B
trust_remote_code: true

### method
stage: sft
do_train: true
finetuning_type: lora
lora_rank: 64
lora_target: all

# Defines the dataset for training and evaluation
dataset: real_estate_finetune_train # real_estate_sample_data
eval_dataset: real_estate_finetune_val
template: qwen3
cutoff_len: 3500
overwrite_cache: true
preprocessing_num_workers: 16

### Output Directory and Logging Configuration
output_dir: /kaggle/working/drive/MyDrive/llm-finetuning/llama_factory_output
logging_steps: 50
save_steps: 200
plot_loss: true
overwrite_output_dir: true
save_only_model: false
save_safetensors: true

### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000

### eval
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 100
load_best_model_at_end: true
metric_for_best_model: eval_loss
greater_is_better: false

### logging
report_to: wandb
run_name: real-estate-finetune-qwen0.6B

### Additional Model Saving and Hub Configuration
push_to_hub: true
export_hub_model_id: "heba1998/Qwen3-0.6B-real-estate-adaptor"
hub_private_repo: true
# hub_strategy: checkpoint
hub_token: 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

### Additional settings
save_strategy: steps
save_total_limit: 2
export_device: auto
adapter_folder: /kaggle/working/drive/MyDrive/llm-finetuning/llama_factory_output/adapters
export_dir: /kaggle/working/drive/MyDrive/llm-finetuning/llama_factory_output/exported_models
use_cache: false

Overwriting /kaggle/working/LLaMA-Factory/examples/train_lora/real_estate_qwen3_lora.yaml


### **Start Fine-Tuning**

In [103]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("Device:", torch.device("cuda" if torch.cuda.is_available() else "cpu"))


CUDA available: True
Device: cuda


In [73]:
!llamafactory-cli train /kaggle/working/LLaMA-Factory/examples/train_lora/real_estate_qwen3_lora.yaml

E0000 00:00:1746746172.253936    2161 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746746172.260589    2161 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[INFO|2025-05-08 23:16:19] llamafactory.cli:143 >> Initializing 2 distributed tasks at: 127.0.0.1:38619
W0508 23:16:21.495000 2181 torch/distributed/run.py:793] 
W0508 23:16:21.495000 2181 torch/distributed/run.py:793] *****************************************
W0508 23:16:21.495000 2181 torch/distributed/run.py:793] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. 
W0508 23:16:21.495000 2181 torch/distributed/run.py:793] *****************************************
E0000 00:00:17467461

In [104]:
!ls /kaggle/working/drive/MyDrive/llm-finetuning/llama_factory_output/checkpoint-800

adapter_config.json	   README.md		    tokenizer_config.json
adapter_model.safetensors  rng_state_0.pth	    tokenizer.json
added_tokens.json	   rng_state_1.pth	    trainer_state.json
merges.txt		   scheduler.pt		    training_args.bin
optimizer.pt		   special_tokens_map.json  vocab.json


<div style="
    border: 1px solid #dfe1e5;
    border-radius: 8px;
    padding: 15px;
    margin: 10px 0;
    background-color: var(--colab-primary-surface-color, #f8f9fa);
    color: var(--colab-primary-text-color, #202124);
    box-shadow: 0 1px 2px 0 rgba(60,64,67,0.3), 0 1px 3px 1px rgba(60,64,67,0.15);
">

---
### **Summary**
---

> Best model is **`checkpoint-800`** (score: 0.4182172417640686)

* **Model:** `Qwen/Qwen3-0.6B`
* **Device:** use Kaggle with 2xT4 GPU
* **Precision:** `torch.bfloat16`
* **Model Configuration Highlights**:
   * Hidden size: `1536`
   * Layers: `28`
   * Heads: `12`
   * Max Position Embeddings: `32,768`
   * Attention Dropout: `0.0`
   * Torch Dtype: `bfloat16`
   * Model Type: `qwen3`

* **Dataset**: I took a small sample (5000/200 for train/val) due to a shortage of resources and time.
   * Given Training set: `2,201,006 examples` loaded from `llm_train_data.jsonl`
   * Given Validation set: `22.2k examples` from `llm_val_data.jsonl`

* **Training**:
   * Training examples = 5000
   * Num Epochs = 3
   * Total optimization steps = 936
   * Each 100 steps, do an evaluation on 200 validation sample.
   * **Trainable params**: 40,370,176 || all params: 636,420,096 || trainable%: 6.3433

* **Monitoring with Wandb**
  *  📂 Data is saved locally in /kaggle/working/LLaMA-Factory/wandb/run-20250508_231709-vbdtlxqp
  * ⭐️ View project at https://wandb.ai/debi2023-group-3/llamafactory
  * 🚀 View run at https://wandb.ai/debi2023-group-3/llamafactory/runs/vbdtlxqp

</div>

### **Push the Model to Hugging Face**

In [107]:
from peft import PeftModel
from transformers import AutoModelForCausalLM

# task_name: "Real Estate Price Prediction"
# task_type: "text-generation"
base_model = "Qwen/Qwen3-0.6B"
adapter_path= "heba1998/llama_factory_output"
# "/kaggle/working/drive/MyDrive/llm-finetuning/llama_factory_output/checkpoint-800"

# Load base model and merge adapter
base_model = AutoModelForCausalLM.from_pretrained(base_model)
model = PeftModel.from_pretrained(base_model, adapter_path)

model = model.merge_and_unload()
model

config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.50G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Qwen3ForCausalLM(
  (model): Qwen3Model(
    (embed_tokens): Embedding(151936, 1024)
    (layers): ModuleList(
      (0-27): 28 x Qwen3DecoderLayer(
        (self_attn): Qwen3Attention(
          (q_proj): Linear(in_features=1024, out_features=2048, bias=False)
          (k_proj): Linear(in_features=1024, out_features=1024, bias=False)
          (v_proj): Linear(in_features=1024, out_features=1024, bias=False)
          (o_proj): Linear(in_features=2048, out_features=1024, bias=False)
          (q_norm): Qwen3RMSNorm((128,), eps=1e-06)
          (k_norm): Qwen3RMSNorm((128,), eps=1e-06)
        )
        (mlp): Qwen3MLP(
          (gate_proj): Linear(in_features=1024, out_features=3072, bias=False)
          (up_proj): Linear(in_features=1024, out_features=3072, bias=False)
          (down_proj): Linear(in_features=3072, out_features=1024, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen3RMSNorm((1024,), eps=1e-06)
        (post_attention_layernorm): Qwe

---
## **Model Serving**
---

### **Serving Using LlaMa-Factory**

In [18]:
!ls /kaggle/working/LLaMA-Factory/examples/inference

llama3_full_sft.yaml  llama3.yaml     real_estate_qwen3_0_6B_hf.yaml
llama3_lora_sft.yaml  qwen2_5vl.yaml  real_estate_qwen3_0_6B_vllm.yaml


In [19]:
!cat /kaggle/working/LLaMA-Factory/examples/inference/llama3_lora_sft.yaml

model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
adapter_name_or_path: saves/llama3-8b/lora/sft
template: llama3
infer_backend: huggingface  # choices: [huggingface, vllm, sglang]
trust_remote_code: true


In [20]:
%%writefile /kaggle/working/LLaMA-Factory/examples/inference/real_estate_qwen3_0_6B_hf.yaml

model_name_or_path: Qwen/Qwen3-0.6B
adapter_name_or_path: /kaggle/working/drive/MyDrive/llm-finetuning/llama_factory_output/checkpoint-800
template: qwen3
finetuning_type: lora
infer_backend: huggingface
trust_remote_code: true

Overwriting /kaggle/working/LLaMA-Factory/examples/inference/real_estate_qwen3_0_6B_hf.yaml


In [136]:
!llamafactory-cli infer /kaggle/working/LLaMA-Factory/examples/inference/real_estate_qwen3_0_6B_hf.yaml --share true

E0000 00:00:1746755819.373180    2901 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746755819.380552    2901 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Unknown command: infer.
----------------------------------------------------------------------
| Usage:                                                             |
|   llamafactory-cli api -h: launch an OpenAI-style API server       |
|   llamafactory-cli chat -h: launch a chat interface in CLI         |
|   llamafactory-cli eval -h: evaluate models                        |
|   llamafactory-cli export -h: merge LoRA adapters and export model |
|   llamafactory-cli train -h: train models                          |
|   llamafactory-cli webchat -h: launch a chat interface in Web UI   |
|   llamafactory-cli webui: launch LlamaBoard          

In [137]:
!llamafactory-cli webui /kaggle/working/LLaMA-Factory/examples/inference/real_estate_qwen3_0_6B_hf.yaml

E0000 00:00:1746755852.874652    2921 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746755852.881414    2921 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Visit http://ip:port for Web UI, e.g., http://127.0.0.1:7860
* Running on local URL:  http://0.0.0.0:7860

To create a public link, set `share=True` in `launch()`.
^C
Keyboard interruption in main thread... closing server.


### **Serving Using vLLM**

In [None]:
# !rm -r /kaggle/working/vllm
# !nvidia-smi
# !pip cache purge
# !pip install ninja
# !export XLA_PYTHON_CLIENT_PREALLOCATE=false
# !export TF_CPP_MIN_LOG_LEVEL=2

In [None]:
# virtual env
!export UV_LINK_MODE=copy
!uv venv vllm --python 3.12 --seed
!source vllm/bin/activate
!pip install --upgrade pip

!uv pip install --system -qU transformers==4.48.3 datasets==3.2.0 optimum==1.24.0
!uv pip install --system -qU vllm==0.7.2 

In [None]:
# import torch
# print("PyTorch version:", torch.__version__)
# print("CUDA available:", "cuda" if torch.cuda.is_available() else "cpu" )

# from vllm import LLM
# print("vLLM imported successfully")

PyTorch version: 2.5.1+cu124
CUDA available: cpu
vLLM imported successfully


In [None]:
def translate_data(row):
    """Generate prompt for LLM inference"""
    description = "\n" + "\n".join([
        "A house listing in the USA with the following details:",
        f"- Status: {row['status']}",
        f"- Bedroom: {row['bed']}",
        f"- Bathroom: {row['bath']}",
        f"- Land size: {row['acre_lot']} acres",
        f"- Location: {row['city']}, {row['state']} {row['zip_code']}",
        f"- House size: {row['house_size']} sqft",
        "Predict the final sale price in $.",
        "Response format (JSON):",
        json.dumps(ResponseSchema.model_json_schema(), indent=2),
        "Response:"
    ])
    return description

In [None]:
base_model_id = "Qwen/Qwen3-0.6"
adapter_model_id = "/kaggle/working/drive/MyDrive/llm-finetuning/llama_factory_output/checkpoint-800"
device = "cuda" if torch.cuda.is_available() else "cpu"

!vllm serve "{base_model_id}" \
              --dtype=half \
              # --gpu-memory-utilization 0.8 \
              --device auto \
              --max_lora_rank 64 \
              --enable-lora \
              --lora-modules real-estate-qwen="{adapter_model_id}"

---

In [None]:
# import requests

# vllm_model_id = "real-estate-qwen3-lora"

# llm_response = requests.post("http://localhost:8000/v1/completions", json={
#     "model": vllm_model_id,
#     "prompt": prompt,
#     "max_tokens": 1000,
#     "temperature": 0.3
# })

# llm_response.json()

NameError: name 'prompt' is not defined

In [None]:
# from vllm import LLM, SamplingParams
# from vllm.lora.request import LoRARequest

# adaptor_path = "/kaggle/working/drive/MyDrive/llm-finetuning/llama_factory_output/checkpoint-800"
# # "heba1998/llama_factory_output/last-checkpoint"
# # "/kaggle/working/drive/MyDrive/llm-finetuning/llama_factory_output/checkpoint-800"

# llm = LLM(
#     model="Qwen/Qwen3-0.6", 
#     enable_lora=True,
#     lora_modules=[{
#     "lora_name": "real_estate_qwen3",
#     "lora_path": adaptor_path,
#     "lora_scope": "all"
#     }]
#  )
# sampling_params = SamplingParams(temperature=0.7,top_p=0.9)
# lora_request = LoRARequest("real_estate_qwen3", 1, adaptor_path)

RuntimeError: Failed to infer device type

In [None]:
# from typing import Dict
# import json_repair

# def predict_price(prompt):
#   """Predict price using vLLM"""
#     if tabular:
#         prompt = translate_data(prompt)
    
#     outputs = llm.generate([prompt], 
#                          sampling_params, 
#                          lora_request=real_estate_adaptor)
#     response_dict = json_repair.loads(outputs[0].outputs[0].text)
#     validated = ResponseSchema(**response_dict)
#     price = validated.model_dump()["estimated_house_price"] if validated else 0
    
#     return validated.model_dump()

In [None]:
# import streamlit as st

# st.title("Real Estate Price Prediction (vLLM)")

# # Input fields
# status = st.selectbox("Status", ["For Sale", "Sold", "Pending"])
# bed = st.number_input("Bedroom", min_value=0.0, step=1.0)
# bath = st.number_input("Bathroom", min_value=0.0, step=1.0)
# acre_lot = st.number_input("Land size (acres)", min_value=0.0, step=0.1)
# house_size = st.number_input("House size (sqft)", min_value=0.0, step=1.0)
# city = st.text_input("City")
# state = st.text_input("State")
# zip_code = st.text_input("Zip code")

# features = {
#             "status": status,
#             "bed": bed,
#             "bath": bath,
#             "acre_lot": acre_lot,
#             "house_size": house_size,
#             "city": city,
#             "state": state,
#             "zip_code": zip_code
#         }

# # Predict button
# if st.button("Predict price"):
#     with st.spinner("Predicting..."):
#         result = predict_price(features)
#     st.json(result)