---

📌 **This notebook has been updated in [jhj0517/finetuning-notebooks](https://github.com/jhj0517/finetuning-notebooks) repository!**

## Version : 1.0.0
---

### What is The GGUF Model Format?
The GGUF format is made by llama.cpp team to support various quantization.

For more information about the diffusers format, you can read :
- https://huggingface.co/docs/transformers/gguf#gguf-and-interaction-with-transformers

In [None]:
#@title #(Optional) Check GPU
#@markdown You can check your GPU setup before start.
!nvidia-smi

In [1]:
#@title #1. Install Dependencies
#@markdown This notebook is powered by https://github.com/ggml-org/llama.cpp
!git clone https://github.com/ggml-org/llama.cpp.git
%cd llama.cpp

Cloning into 'llama.cpp'...
remote: Enumerating objects: 44397, done.[K
remote: Counting objects: 100% (64/64), done.[K
remote: Compressing objects: 100% (44/44), done.[K
remote: Total 44397 (delta 40), reused 20 (delta 20), pack-reused 44333 (from 3)[K
Receiving objects: 100% (44397/44397), 91.25 MiB | 16.12 MiB/s, done.
Resolving deltas: 100% (31991/31991), done.
/content/llama.cpp


In [3]:
#@title # 2. (Optional) Mount Google Drive

#@markdown If your model file is in the Google Drive, you can mount the Google Drive.

from google.colab import drive
import os
drive.mount('/content/drive')

Mounted at /content/drive


In [10]:
#@title # 3. Convert to GGUF
import os

#@markdown ### Path Configuration
BASE_MODEL_PATH = "/content/drive/MyDrive/finetuning-notebooks/flux/base_models/FLUX-dev/transformer" #@param {type:"string"}
OUTPUT_DIR_PATH = "/content/drive/MyDrive/finetuning-notebooks/flux/base_models/GGUF" #@param {type:"string"}
GGUF_NAME = "Flux.1-dev" #@param {type:"string"}

os.makedirs(OUTPUT_DIR_PATH, exist_ok=True)

import os
import tempfile
from pathlib import Path

CONVERSION_SCRIPT = "convert_hf_to_gguf.py"

def generate_importance_matrix_command(fp16_path, train_data_path, imatrix_path):
    print("Importance matrix generation command requested...")
    command = f"python generate_imatrix.py {fp16_path} {train_data_path} {imatrix_path}"
    return command

def process_model_command(model_dir_path,
                          model_name,
                          output_dir,
                          q_method,
                          use_imatrix,
                          imatrix_q_method,
                          train_data_file):
    commands_list = []
    fp16_path = str(Path(output_dir)/f"{model_name}-fp16.gguf")
    conversion_command = [
        "python", CONVERSION_SCRIPT,
        str(model_dir_path),
        "--outtype", "f16",
        "--outfile", fp16_path
    ]
    conversion_command_str = " ".join(conversion_command)
    commands_list.append(conversion_command_str)

    imatrix_path = str(Path(output_dir)/"imatrix.dat")
    if use_imatrix:
        if train_data_file:
            train_data_path = train_data_file.name # Assuming train_data_file is a file object
        else:
            train_data_path = "llama.cpp/groups_merged.txt" #fallback calibration dataset

        imatrix_gen_command_str = generate_importance_matrix_command(fp16_path, train_data_path, imatrix_path) # Assuming this returns a command string
        commands_list.append(imatrix_gen_command_str)


    quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
    quantized_gguf_path = str(Path(output_dir)/quantized_gguf_name)
    if use_imatrix:
        quantize_ggml_command = [
            "llama-quantize",
            "--imatrix", imatrix_path,
            fp16_path,
            quantized_gguf_path,
            imatrix_q_method
        ]
    else:
        quantize_ggml_command = [
            "llama-quantize",
            fp16_path,
            quantized_gguf_path,
            q_method
        ]
    quantize_ggml_command_str = " ".join(quantize_ggml_command)
    commands_list.append(quantize_ggml_command_str)
    return commands_list


command_list = process_model_command(
      model_dir_path=BASE_MODEL_PATH,
      model_name=GGUF_NAME,
      output_dir=OUTPUT_DIR_PATH,
      q_method="Q8_0",
      use_imatrix=False,
      imatrix_q_method="Q4_0",
      train_data_file=None
)
command = ' '.join(command_list)
!{command}





# command = [
#     "python", "convert_hf_to_gguf.py",
#     BASE_MODEL_PATH
# ]

# # ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"]
# #@markdown ### Quantization Type
# Q8_0 = True # @param {type:"boolean"}

# q8_0_path = os.path.join(OUTPUT_DIR_PATH, f"{OUTPUT_GGUF_NAME}-Q8_0.gguf")
# q_command = command + [
#     f"--outfile {q8_0_path}",
#     f"--outtype q8_0"
# ]
# q_command = ' '.join(q_command)
# !{q_command}




usage: convert_hf_to_gguf.py [-h] [--vocab-only] [--outfile OUTFILE]
                             [--outtype {f32,f16,bf16,q8_0,tq1_0,tq2_0,auto}] [--bigendian]
                             [--use-temp-file] [--no-lazy] [--model-name MODEL_NAME] [--verbose]
                             [--split-max-tensors SPLIT_MAX_TENSORS]
                             [--split-max-size SPLIT_MAX_SIZE] [--dry-run]
                             [--no-tensor-first-split] [--metadata METADATA]
                             [--print-supported-models]
                             [model]
convert_hf_to_gguf.py: error: unrecognized arguments: llama-quantize /content/drive/MyDrive/finetuning-notebooks/flux/base_models/GGUF/Flux.1-dev-fp16.gguf /content/drive/MyDrive/finetuning-notebooks/flux/base_models/GGUF/flux.1-dev-q8_0.gguf Q8_0
