codewithdark-git · codewithdark-git · Apr 25, 2026 · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 <div align="center">
   <img src="docs/images/1.png" alt="QuantLLM Logo" />
 
-  # 🚀 QuantLLM v2.0
+  # 🚀 QuantLLM v2.1 (pre-release)
 
   **The Ultra-Fast LLM Quantization & Export Library**
 
@@ -52,9 +52,12 @@ model = AutoModelForCausalLM.from_pretrained(
 ```python
 from quantllm import turbo
 
-model = turbo("meta-llama/Llama-3-8B")     # Auto-quantizes
+model = turbo(
+    "meta-llama/Llama-3-8B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)  # Auto-quantizes
 model.generate("Hello!")                    # Generate text
-model.export("gguf", quantization="Q4_K_M") # Export to GGUF
+model.export()                              # Export to GGUF with shared config
 ```
 
 ---
@@ -77,14 +80,17 @@ pip install "quantllm[full] @ git+https://github.com/codewithdark-git/QuantLLM.g
 from quantllm import turbo
 
 # Load with automatic optimization
-model = turbo("meta-llama/Llama-3.2-3B")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 
 # Generate text
 response = model.generate("Explain quantum computing simply")
 print(response)
 
 # Export to GGUF
-model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
+model.export("gguf", "model.Q4_K_M.gguf")
 ```
 
 **QuantLLM automatically:**
@@ -102,11 +108,14 @@ model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
 One unified interface for everything:
 
 ```python
-model = turbo("mistralai/Mistral-7B")
+model = turbo(
+    "mistralai/Mistral-7B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 model.generate("Hello!")
 model.finetune(data, epochs=3)
-model.export("gguf", quantization="Q4_K_M")
-model.push("user/repo", format="gguf")
+model.export()
+model.push("user/repo")
 ```
 
 ### ⚡ Performance Optimizations
@@ -133,7 +142,7 @@ Llama 2/3, Mistral, Mixtral, Qwen 1/2, Phi 1/2/3, Gemma, Falcon, DeepSeek, Yi, S
 
 ```
 ╔════════════════════════════════════════════════════════════╗
-║   🚀 QuantLLM v2.0.0                                       ║
+║   🚀 QuantLLM v2.1.0rc1                                    ║
 ║   Ultra-fast LLM Quantization & Export                     ║
 ║   ✓ GGUF  ✓ ONNX  ✓ MLX  ✓ SafeTensors                     ║
 ╚════════════════════════════════════════════════════════════╝
@@ -148,7 +157,7 @@ Llama 2/3, Mistral, Mixtral, Qwen 1/2, Phi 1/2/3, Gemma, Falcon, DeepSeek, Yi, S
 Auto-generates model cards with YAML frontmatter, usage examples, and "Use this model" button:
 
 ```python
-model.push("user/my-model", format="gguf", quantization="Q4_K_M")
+model.push("user/my-model")
 ```
 
 ---
@@ -195,7 +204,10 @@ model.export("safetensors", "./model-hf/")
 ```python
 from quantllm import turbo
 
-model = turbo("meta-llama/Llama-3.2-3B")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 
 # Simple generation
 response = model.generate(
@@ -267,8 +279,6 @@ model = turbo("meta-llama/Llama-3.2-3B")
 # Push with auto-generated model card
 model.push(
     "your-username/my-model",
-    format="gguf",
-    quantization="Q4_K_M",
     license="apache-2.0"
 )
 ```

diff --git a/docs/api/gguf.md b/docs/api/gguf.md
@@ -10,8 +10,11 @@ Export models to GGUF format for llama.cpp, Ollama, and LM Studio.
 from quantllm import turbo, convert_to_gguf, quantize_gguf
 
 # Method 1: Via TurboModel
-model = turbo("meta-llama/Llama-3.2-3B")
-model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
+model.export("gguf", "model.Q4_K_M.gguf")
 
 # Method 2: Direct conversion
 convert_to_gguf("meta-llama/Llama-3.2-3B", "model.Q4_K_M.gguf", quant_type="Q4_K_M")

diff --git a/docs/api/hub.md b/docs/api/hub.md
@@ -10,8 +10,11 @@ Push models to HuggingFace Hub with auto-generated model cards.
 from quantllm import turbo, QuantLLMHubManager
 
 # Method 1: TurboModel.push() (Recommended)
-model = turbo("meta-llama/Llama-3.2-3B")
-model.push("user/my-model", format="gguf", quantization="Q4_K_M")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
+model.push("user/my-model")
 
 # Method 2: QuantLLMHubManager (Advanced)
 manager = QuantLLMHubManager("user/my-model", hf_token="hf_...")
@@ -30,7 +33,7 @@ def push(
     self,
     repo_id: str,
     token: Optional[str] = None,
-    format: str = "safetensors",
+    format: Optional[str] = None,
     quantization: Optional[str] = None,
     license: str = "apache-2.0",
     commit_message: str = "Upload model via QuantLLM",
@@ -44,7 +47,7 @@ def push(
 |-----------|------|---------|-------------|
 | `repo_id` | str | required | HuggingFace repo ID (user/model) |
 | `token` | str | None | HF token (or use HF_TOKEN env) |
-| `format` | str | "safetensors" | Export format |
+| `format` | str | None | Export format (uses `config["push_format"]` when omitted) |
 | `quantization` | str | None | Quantization type |
 | `license` | str | "apache-2.0" | License type |
 
@@ -62,13 +65,14 @@ def push(
 ```python
 from quantllm import turbo
 
-model = turbo("meta-llama/Llama-3.2-3B")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 
 # Push as GGUF
 model.push(
-    "your-username/llama-3.2-3b-gguf",
-    format="gguf",
-    quantization="Q4_K_M"
+    "your-username/llama-3.2-3b-gguf"
 )
 
 # Push as ONNX

diff --git a/docs/api/turbo.md b/docs/api/turbo.md
@@ -14,6 +14,7 @@ def turbo(
     max_length: Optional[int] = None,
     device: Optional[str] = None,
     dtype: Optional[str] = None,
+    config: Optional[Dict[str, Any]] = None,
     quantize: bool = True,
     trust_remote_code: bool = False,
     verbose: bool = True,
@@ -32,6 +33,7 @@ def turbo(
 | `max_length` | int | auto | Maximum context length |
 | `device` | str | auto | Device ("cuda", "cpu", "cuda:0", "auto") |
 | `dtype` | str | auto | Data type ("float16", "bfloat16") |
+| `config` | dict | None | Shared export/push defaults (`format`, `quantization`, `push_format`, `push_quantization`) |
 | `quantize` | bool | True | Whether to apply quantization |
 | `trust_remote_code` | bool | False | Trust remote code in model |
 | `verbose` | bool | True | Show loading progress and stats |
@@ -124,7 +126,7 @@ When `verbose=True` (default), you'll see:
 
 ```
 ╔════════════════════════════════════════════════════════════╗
-║  🚀 QuantLLM v2.0.0                                        ║
+║  🚀 QuantLLM v2.1.0rc1                                        ║
 ╚════════════════════════════════════════════════════════════╝
 
 📊 Loading: meta-llama/Llama-3.2-3B

diff --git a/docs/api/turbomodel.md b/docs/api/turbomodel.md
@@ -232,23 +232,27 @@ Export the model to various formats.
 ```python
 def export(
     self,
-    format: str,
-    output_path: str,
+    format: Optional[str] = None,
+    output_path: Optional[str] = None,
     quantization: Optional[str] = None,
     **kwargs
 ) -> str
 ```
 
 | Parameter | Type | Description |
 |-----------|------|-------------|
-| `format` | str | "gguf", "onnx", "mlx", "safetensors" |
-| `output_path` | str | Output file or directory |
+| `format` | str | "gguf", "onnx", "mlx", "safetensors" (optional, uses shared config) |
+| `output_path` | str | Output file or directory (optional) |
 | `quantization` | str | Quantization type (format-specific) |
 
 **Examples:**
 ```python
 # GGUF
-model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
+model.export()
 
 # ONNX
 model.export("onnx", "./model-onnx/")
@@ -269,7 +273,7 @@ def push(
     self,
     repo_id: str,
     token: Optional[str] = None,
-    format: str = "safetensors",
+    format: Optional[str] = None,
     quantization: Optional[str] = None,
     license: str = "apache-2.0",
     commit_message: str = "Upload model via QuantLLM",
@@ -281,9 +285,7 @@ def push(
 ```python
 # Push as GGUF
 model.push(
-    "your-username/my-model",
-    format="gguf",
-    quantization="Q4_K_M"
+    "your-username/my-model"
 )
 
 # Push as MLX

diff --git a/docs/conf.py b/docs/conf.py
@@ -3,7 +3,7 @@
 project = 'QuantLLM'
 copyright = '2024, Dark Coder'
 author = 'Dark Coder'
-release = '2.0.0'
+release = '2.1.0rc1'
 
 # Extensions
 extensions = [
@@ -21,7 +21,7 @@
 # HTML output
 html_theme = 'sphinx_rtd_theme'
 html_static_path = ['_static']
-html_title = 'QuantLLM v2.0'
+html_title = 'QuantLLM v2.1'
 html_logo = 'images/logo.png'
 html_favicon = 'images/favicon.ico'
 

diff --git a/docs/guide/finetuning.md b/docs/guide/finetuning.md
@@ -193,13 +193,13 @@ print("Fine-tuned:", model.generate("prompt"))
 
 ```python
 # Export to GGUF
-model.export("gguf", "finetuned.Q4_K_M.gguf", quantization="Q4_K_M")
+model.export("gguf", "finetuned.Q4_K_M.gguf")
 
 # Export to SafeTensors
 model.export("safetensors", "./finetuned-model/")
 
 # Push to HuggingFace
-model.push("your-username/finetuned-model", format="gguf")
+model.push("your-username/finetuned-model")
 ```
 
 ### Save and Load

diff --git a/docs/guide/gguf-export.md b/docs/guide/gguf-export.md
@@ -130,10 +130,12 @@ print(output["choices"][0]["text"])
 Export and push in one step:
 
 ```python
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 model.push(
     "your-username/my-model-gguf",
-    format="gguf",
-    quantization="Q4_K_M",
     license="apache-2.0"
 )
 ```

diff --git a/docs/guide/hub-integration.md b/docs/guide/hub-integration.md
@@ -11,14 +11,15 @@ The easiest way to share your model:
 ```python
 from quantllm import turbo
 
-model = turbo("meta-llama/Llama-3.2-3B")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 
 # Push with auto-generated model card
 model.push(
     "your-username/my-model",
-    token="hf_...",
-    format="gguf",
-    quantization="Q4_K_M"
+    token="hf_..."
 )
 ```
 
@@ -49,34 +50,18 @@ model.push("user/repo", token="hf_...")
 ```python
 from quantllm import turbo
 
-model = turbo("meta-llama/Llama-3.2-3B")
-
-# Push as GGUF (for Ollama, llama.cpp, LM Studio)
-model.push(
-    "your-username/my-model-gguf",
-    format="gguf",
-    quantization="Q4_K_M",
-    license="apache-2.0"
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={
+        "format": "gguf",
+        "quantization": "Q4_K_M",
+        "push_format": "gguf",
+    },
 )
 
-# Push as ONNX
-model.push(
-    "your-username/my-model-onnx",
-    format="onnx"
-)
-
-# Push as MLX (Apple Silicon)
-model.push(
-    "your-username/my-model-mlx",
-    format="mlx",
-    quantization="4bit"
-)
-
-# Push as SafeTensors (default)
-model.push(
-    "your-username/my-model",
-    format="safetensors"
-)
+# Uses shared config defaults
+model.export()
+model.push("your-username/my-model-gguf", license="apache-2.0")
 ```
 
 ### Method 2: QuantLLMHubManager (Advanced)