diff --git a/README.md b/README.md
index b800541..0555b85 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 <div align="center">
   <img src="docs/images/1.png" alt="QuantLLM Logo" />
   
-  # 🚀 QuantLLM v2.0
+  # 🚀 QuantLLM v2.1 (pre-release)
   
   **The Ultra-Fast LLM Quantization & Export Library**
 
@@ -52,9 +52,12 @@ model = AutoModelForCausalLM.from_pretrained(
 ```python
 from quantllm import turbo
 
-model = turbo("meta-llama/Llama-3-8B")     # Auto-quantizes
+model = turbo(
+    "meta-llama/Llama-3-8B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)  # Auto-quantizes
 model.generate("Hello!")                    # Generate text
-model.export("gguf", quantization="Q4_K_M") # Export to GGUF
+model.export()                              # Export to GGUF with shared config
 ```
 
 ---
@@ -77,14 +80,17 @@ pip install "quantllm[full] @ git+https://github.com/codewithdark-git/QuantLLM.g
 from quantllm import turbo
 
 # Load with automatic optimization
-model = turbo("meta-llama/Llama-3.2-3B")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 
 # Generate text
 response = model.generate("Explain quantum computing simply")
 print(response)
 
 # Export to GGUF
-model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
+model.export("gguf", "model.Q4_K_M.gguf")
 ```
 
 **QuantLLM automatically:**
@@ -102,11 +108,14 @@ model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
 One unified interface for everything:
 
 ```python
-model = turbo("mistralai/Mistral-7B")
+model = turbo(
+    "mistralai/Mistral-7B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 model.generate("Hello!")
 model.finetune(data, epochs=3)
-model.export("gguf", quantization="Q4_K_M")
-model.push("user/repo", format="gguf")
+model.export()
+model.push("user/repo")
 ```
 
 ### ⚡ Performance Optimizations
@@ -133,7 +142,7 @@ Llama 2/3, Mistral, Mixtral, Qwen 1/2, Phi 1/2/3, Gemma, Falcon, DeepSeek, Yi, S
 
 ```
 ╔════════════════════════════════════════════════════════════╗
-║   🚀 QuantLLM v2.0.0                                       ║
+║   🚀 QuantLLM v2.1.0rc1                                    ║
 ║   Ultra-fast LLM Quantization & Export                     ║
 ║   ✓ GGUF  ✓ ONNX  ✓ MLX  ✓ SafeTensors                     ║
 ╚════════════════════════════════════════════════════════════╝
@@ -148,7 +157,7 @@ Llama 2/3, Mistral, Mixtral, Qwen 1/2, Phi 1/2/3, Gemma, Falcon, DeepSeek, Yi, S
 Auto-generates model cards with YAML frontmatter, usage examples, and "Use this model" button:
 
 ```python
-model.push("user/my-model", format="gguf", quantization="Q4_K_M")
+model.push("user/my-model")
 ```
 
 ---
@@ -195,7 +204,10 @@ model.export("safetensors", "./model-hf/")
 ```python
 from quantllm import turbo
 
-model = turbo("meta-llama/Llama-3.2-3B")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 
 # Simple generation
 response = model.generate(
@@ -267,8 +279,6 @@ model = turbo("meta-llama/Llama-3.2-3B")
 # Push with auto-generated model card
 model.push(
     "your-username/my-model",
-    format="gguf",
-    quantization="Q4_K_M",
     license="apache-2.0"
 )
 ```
diff --git a/docs/api/gguf.md b/docs/api/gguf.md
index 1135c5e..12f18dc 100644
--- a/docs/api/gguf.md
+++ b/docs/api/gguf.md
@@ -10,8 +10,11 @@ Export models to GGUF format for llama.cpp, Ollama, and LM Studio.
 from quantllm import turbo, convert_to_gguf, quantize_gguf
 
 # Method 1: Via TurboModel
-model = turbo("meta-llama/Llama-3.2-3B")
-model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
+model.export("gguf", "model.Q4_K_M.gguf")
 
 # Method 2: Direct conversion
 convert_to_gguf("meta-llama/Llama-3.2-3B", "model.Q4_K_M.gguf", quant_type="Q4_K_M")
diff --git a/docs/api/hub.md b/docs/api/hub.md
index 0fb1fa6..d501e56 100644
--- a/docs/api/hub.md
+++ b/docs/api/hub.md
@@ -10,8 +10,11 @@ Push models to HuggingFace Hub with auto-generated model cards.
 from quantllm import turbo, QuantLLMHubManager
 
 # Method 1: TurboModel.push() (Recommended)
-model = turbo("meta-llama/Llama-3.2-3B")
-model.push("user/my-model", format="gguf", quantization="Q4_K_M")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
+model.push("user/my-model")
 
 # Method 2: QuantLLMHubManager (Advanced)
 manager = QuantLLMHubManager("user/my-model", hf_token="hf_...")
@@ -30,7 +33,7 @@ def push(
     self,
     repo_id: str,
     token: Optional[str] = None,
-    format: str = "safetensors",
+    format: Optional[str] = None,
     quantization: Optional[str] = None,
     license: str = "apache-2.0",
     commit_message: str = "Upload model via QuantLLM",
@@ -44,7 +47,7 @@ def push(
 |-----------|------|---------|-------------|
 | `repo_id` | str | required | HuggingFace repo ID (user/model) |
 | `token` | str | None | HF token (or use HF_TOKEN env) |
-| `format` | str | "safetensors" | Export format |
+| `format` | str | None | Export format (uses `config["push_format"]` when omitted) |
 | `quantization` | str | None | Quantization type |
 | `license` | str | "apache-2.0" | License type |
 
@@ -62,13 +65,14 @@ def push(
 ```python
 from quantllm import turbo
 
-model = turbo("meta-llama/Llama-3.2-3B")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 
 # Push as GGUF
 model.push(
-    "your-username/llama-3.2-3b-gguf",
-    format="gguf",
-    quantization="Q4_K_M"
+    "your-username/llama-3.2-3b-gguf"
 )
 
 # Push as ONNX
diff --git a/docs/api/turbo.md b/docs/api/turbo.md
index 512f8f0..ef4f0ed 100644
--- a/docs/api/turbo.md
+++ b/docs/api/turbo.md
@@ -14,6 +14,7 @@ def turbo(
     max_length: Optional[int] = None,
     device: Optional[str] = None,
     dtype: Optional[str] = None,
+    config: Optional[Dict[str, Any]] = None,
     quantize: bool = True,
     trust_remote_code: bool = False,
     verbose: bool = True,
@@ -32,6 +33,7 @@ def turbo(
 | `max_length` | int | auto | Maximum context length |
 | `device` | str | auto | Device ("cuda", "cpu", "cuda:0", "auto") |
 | `dtype` | str | auto | Data type ("float16", "bfloat16") |
+| `config` | dict | None | Shared export/push defaults (`format`, `quantization`, `push_format`, `push_quantization`) |
 | `quantize` | bool | True | Whether to apply quantization |
 | `trust_remote_code` | bool | False | Trust remote code in model |
 | `verbose` | bool | True | Show loading progress and stats |
@@ -124,7 +126,7 @@ When `verbose=True` (default), you'll see:
 
 ```
 ╔════════════════════════════════════════════════════════════╗
-║  🚀 QuantLLM v2.0.0                                        ║
+║  🚀 QuantLLM v2.1.0rc1                                        ║
 ╚════════════════════════════════════════════════════════════╝
 
 📊 Loading: meta-llama/Llama-3.2-3B
diff --git a/docs/api/turbomodel.md b/docs/api/turbomodel.md
index 3542463..d091958 100644
--- a/docs/api/turbomodel.md
+++ b/docs/api/turbomodel.md
@@ -232,8 +232,8 @@ Export the model to various formats.
 ```python
 def export(
     self,
-    format: str,
-    output_path: str,
+    format: Optional[str] = None,
+    output_path: Optional[str] = None,
     quantization: Optional[str] = None,
     **kwargs
 ) -> str
@@ -241,14 +241,18 @@ def export(
 
 | Parameter | Type | Description |
 |-----------|------|-------------|
-| `format` | str | "gguf", "onnx", "mlx", "safetensors" |
-| `output_path` | str | Output file or directory |
+| `format` | str | "gguf", "onnx", "mlx", "safetensors" (optional, uses shared config) |
+| `output_path` | str | Output file or directory (optional) |
 | `quantization` | str | Quantization type (format-specific) |
 
 **Examples:**
 ```python
 # GGUF
-model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
+model.export()
 
 # ONNX
 model.export("onnx", "./model-onnx/")
@@ -269,7 +273,7 @@ def push(
     self,
     repo_id: str,
     token: Optional[str] = None,
-    format: str = "safetensors",
+    format: Optional[str] = None,
     quantization: Optional[str] = None,
     license: str = "apache-2.0",
     commit_message: str = "Upload model via QuantLLM",
@@ -281,9 +285,7 @@ def push(
 ```python
 # Push as GGUF
 model.push(
-    "your-username/my-model",
-    format="gguf",
-    quantization="Q4_K_M"
+    "your-username/my-model"
 )
 
 # Push as MLX
diff --git a/docs/conf.py b/docs/conf.py
index dacb626..ebd8a42 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -3,7 +3,7 @@
 project = 'QuantLLM'
 copyright = '2024, Dark Coder'
 author = 'Dark Coder'
-release = '2.0.0'
+release = '2.1.0rc1'
 
 # Extensions
 extensions = [
@@ -21,7 +21,7 @@
 # HTML output
 html_theme = 'sphinx_rtd_theme'
 html_static_path = ['_static']
-html_title = 'QuantLLM v2.0'
+html_title = 'QuantLLM v2.1'
 html_logo = 'images/logo.png'
 html_favicon = 'images/favicon.ico'
 
diff --git a/docs/guide/finetuning.md b/docs/guide/finetuning.md
index 654debf..3dd29e4 100644
--- a/docs/guide/finetuning.md
+++ b/docs/guide/finetuning.md
@@ -193,13 +193,13 @@ print("Fine-tuned:", model.generate("prompt"))
 
 ```python
 # Export to GGUF
-model.export("gguf", "finetuned.Q4_K_M.gguf", quantization="Q4_K_M")
+model.export("gguf", "finetuned.Q4_K_M.gguf")
 
 # Export to SafeTensors
 model.export("safetensors", "./finetuned-model/")
 
 # Push to HuggingFace
-model.push("your-username/finetuned-model", format="gguf")
+model.push("your-username/finetuned-model")
 ```
 
 ### Save and Load
diff --git a/docs/guide/gguf-export.md b/docs/guide/gguf-export.md
index 0c61f8d..35329d5 100644
--- a/docs/guide/gguf-export.md
+++ b/docs/guide/gguf-export.md
@@ -130,10 +130,12 @@ print(output["choices"][0]["text"])
 Export and push in one step:
 
 ```python
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 model.push(
     "your-username/my-model-gguf",
-    format="gguf",
-    quantization="Q4_K_M",
     license="apache-2.0"
 )
 ```
diff --git a/docs/guide/hub-integration.md b/docs/guide/hub-integration.md
index 176976a..c0887c6 100644
--- a/docs/guide/hub-integration.md
+++ b/docs/guide/hub-integration.md
@@ -11,14 +11,15 @@ The easiest way to share your model:
 ```python
 from quantllm import turbo
 
-model = turbo("meta-llama/Llama-3.2-3B")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 
 # Push with auto-generated model card
 model.push(
     "your-username/my-model",
-    token="hf_...",
-    format="gguf",
-    quantization="Q4_K_M"
+    token="hf_..."
 )
 ```
 
@@ -49,34 +50,18 @@ model.push("user/repo", token="hf_...")
 ```python
 from quantllm import turbo
 
-model = turbo("meta-llama/Llama-3.2-3B")
-
-# Push as GGUF (for Ollama, llama.cpp, LM Studio)
-model.push(
-    "your-username/my-model-gguf",
-    format="gguf",
-    quantization="Q4_K_M",
-    license="apache-2.0"
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={
+        "format": "gguf",
+        "quantization": "Q4_K_M",
+        "push_format": "gguf",
+    },
 )
 
-# Push as ONNX
-model.push(
-    "your-username/my-model-onnx",
-    format="onnx"
-)
-
-# Push as MLX (Apple Silicon)
-model.push(
-    "your-username/my-model-mlx",
-    format="mlx",
-    quantization="4bit"
-)
-
-# Push as SafeTensors (default)
-model.push(
-    "your-username/my-model",
-    format="safetensors"
-)
+# Uses shared config defaults
+model.export()
+model.push("your-username/my-model-gguf", license="apache-2.0")
 ```
 
 ### Method 2: QuantLLMHubManager (Advanced)
diff --git a/docs/index.md b/docs/index.md
index 7fc9f0c..63a78d3 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -8,24 +8,27 @@
 
 ---
 
-## Welcome to QuantLLM v2.0
+## Welcome to QuantLLM v2.1 (pre-release)
 
 QuantLLM makes working with large language models simple. Load any model, quantize it automatically, fine-tune with your data, and export to any format — all with just a few lines of code.
 
 ```python
 from quantllm import turbo
 
-# Load with automatic 4-bit quantization
-model = turbo("meta-llama/Llama-3.2-3B")
+# Load with shared export/push defaults
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 
 # Generate text
 print(model.generate("Explain quantum computing"))
 
 # Export to GGUF for Ollama/llama.cpp
-model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
+model.export()
 
 # Push to HuggingFace with auto-generated model card
-model.push("username/my-model", format="gguf", quantization="Q4_K_M")
+model.push("username/my-model")
 ```
 
 ---
@@ -89,7 +92,11 @@ model = turbo("microsoft/phi-3-mini")
 
 ### Export to Any Format
 ```python
-model.export("gguf", "model.gguf", quantization="Q4_K_M")
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
+model.export()
 model.export("onnx", "./model-onnx/")
 model.export("mlx", "./model-mlx/", quantization="4bit")
 ```
@@ -101,7 +108,7 @@ model.finetune("training_data.json", epochs=3)
 
 ### Push to HuggingFace
 ```python
-model.push("username/my-model", format="gguf")
+model.push("username/my-model")
 ```
 
 ---
diff --git a/docs/installation.md b/docs/installation.md
index 15f4c7c..478ee94 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -90,7 +90,7 @@ Expected output:
 ```
 ╔════════════════════════════════════════════════════════════╗
 ║                                                            ║
-║   🚀 QuantLLM v2.0.0                                       ║
+║   🚀 QuantLLM v2.1.0rc1                                       ║
 ║   Ultra-fast LLM Quantization & Export                     ║
 ║                                                            ║
 ║   ✓ GGUF  ✓ ONNX  ✓ MLX  ✓ SafeTensors                     ║
diff --git a/docs/quickstart.md b/docs/quickstart.md
index 7382236..8050787 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -128,10 +128,12 @@ Share your model with the world:
 
 ```python
 # Push with auto-generated model card
+model = turbo(
+    "meta-llama/Llama-3.2-3B",
+    config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+)
 model.push(
     "your-username/my-awesome-model",
-    format="gguf",
-    quantization="Q4_K_M",
     license="apache-2.0"
 )
 ```
@@ -196,7 +198,7 @@ quantllm.show_banner()
 ```
 ╔════════════════════════════════════════════════════════════╗
 ║                                                            ║
-║   🚀 QuantLLM v2.0.0                                       ║
+║   🚀 QuantLLM v2.1.0rc1                                       ║
 ║   Ultra-fast LLM Quantization & Export                     ║
 ║                                                            ║
 ║   ✓ GGUF  ✓ ONNX  ✓ MLX  ✓ SafeTensors                     ║
diff --git a/examples/01_quickstart.py b/examples/01_quickstart.py
index 563a6df..a40e128 100644
--- a/examples/01_quickstart.py
+++ b/examples/01_quickstart.py
@@ -1,5 +1,5 @@
 """
-QuantLLM v2.0 - Quick Start Example
+QuantLLM v2.1 - Quick Start Example
 
 The simplest way to use QuantLLM.
 """
diff --git a/examples/02_gguf_export.py b/examples/02_gguf_export.py
index 0efa2a7..37fee24 100644
--- a/examples/02_gguf_export.py
+++ b/examples/02_gguf_export.py
@@ -1,5 +1,5 @@
 """
-QuantLLM v2.0 - GGUF Export Example
+QuantLLM v2.1 - GGUF Export Example
 
 Export models to GGUF format for use with llama.cpp, Ollama, LM Studio.
 No external dependencies required!
diff --git a/examples/03_finetuning.py b/examples/03_finetuning.py
index c2021bd..f254a7f 100644
--- a/examples/03_finetuning.py
+++ b/examples/03_finetuning.py
@@ -1,5 +1,5 @@
 """
-QuantLLM v2.0 - Fine-tuning Example
+QuantLLM v2.1 - Fine-tuning Example
 
 Fine-tune a quantized model using LoRA.
 """
diff --git a/examples/04_hub_push.py b/examples/04_hub_push.py
index af1ef3b..6a438c9 100644
--- a/examples/04_hub_push.py
+++ b/examples/04_hub_push.py
@@ -1,5 +1,5 @@
 """
-QuantLLM v2.0 - Push to HuggingFace Hub
+QuantLLM v2.1 - Push to HuggingFace Hub
 
 Push your models to HuggingFace Hub.
 """
diff --git a/examples/README.md b/examples/README.md
index b6a0e68..810f7d7 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,4 +1,4 @@
-# QuantLLM v2.0 Examples
+# QuantLLM v2.1 Examples
 
 Simple examples for the new TurboModel API.
 
diff --git a/quantllm/__init__.py b/quantllm/__init__.py
index 5c81eb6..6f2933b 100644
--- a/quantllm/__init__.py
+++ b/quantllm/__init__.py
@@ -1,5 +1,5 @@
 """
-QuantLLM v2.0 - Ultra-fast LLM Quantization & GGUF Export
+QuantLLM v2.1 - Ultra-fast LLM Quantization & GGUF Export
 
 The simplest way to load, quantize, fine-tune, and export LLMs.
 
@@ -13,16 +13,19 @@
     >>> from quantllm import turbo
     >>> 
     >>> # Load any model (auto-quantizes to 4-bit)
-    >>> model = turbo("meta-llama/Llama-3.2-3B")
+    >>> model = turbo(
+    ...     "meta-llama/Llama-3.2-3B",
+    ...     config={"format": "gguf", "quantization": "Q4_K_M", "push_format": "gguf"},
+    ... )
     >>> 
     >>> # Generate text
     >>> model.generate("Hello, world!")
     >>> 
     >>> # Export to GGUF with Q4_K_M quantization
-    >>> model.export("gguf", "model.Q4_K_M.gguf", quantization="Q4_K_M")
+    >>> model.export()
     >>> 
     >>> # Push to HuggingFace Hub
-    >>> model.push("username/my-model", format="gguf", quantization="Q4_K_M")
+    >>> model.push("username/my-model")
 """
 
 import os
@@ -73,7 +76,7 @@
 # Configure logging (minimal by default)
 configure_logging("WARNING")
 
-__version__ = "2.0.0"
+__version__ = "2.1.0rc1"
 __title__ = "QuantLLM"
 __description__ = "Ultra-fast LLM Quantization & Export (GGUF, ONNX, MLX)"
 __author__ = "Dark Coder"
diff --git a/quantllm/core/export.py b/quantllm/core/export.py
index 40f517a..05dcbb5 100644
--- a/quantllm/core/export.py
+++ b/quantllm/core/export.py
@@ -1,5 +1,5 @@
 """
-Universal Export Module for QuantLLM v2.0
+Universal Export Module for QuantLLM v2.1
 
 Provides unified export functionality to multiple formats:
 - GGUF (llama.cpp, Ollama, LM Studio)
diff --git a/quantllm/core/memory.py b/quantllm/core/memory.py
index bed8196..43298b8 100644
--- a/quantllm/core/memory.py
+++ b/quantllm/core/memory.py
@@ -1,5 +1,5 @@
 """
-Memory Optimization Utilities for QuantLLM v2.0
+Memory Optimization Utilities for QuantLLM v2.1
 
 Advanced memory management for training and inference of large models
 on limited GPU memory.
diff --git a/quantllm/core/training.py b/quantllm/core/training.py
index 053ec99..7ab9932 100644
--- a/quantllm/core/training.py
+++ b/quantllm/core/training.py
@@ -1,5 +1,5 @@
 """
-Advanced Training Utilities for QuantLLM v2.0
+Advanced Training Utilities for QuantLLM v2.1
 
 Provides auto-configuration and optimization for fine-tuning
 with minimal user input.
diff --git a/quantllm/core/turbo_model.py b/quantllm/core/turbo_model.py
index c04de1d..53ec668 100644
--- a/quantllm/core/turbo_model.py
+++ b/quantllm/core/turbo_model.py
@@ -26,6 +26,12 @@
 from .memory import memory_optimized_tensor_order
 
 DEFAULT_CHUNKED_SHARD_SIZE = "2GB"
+DEFAULT_EXPORT_PUSH_CONFIG = {
+    "format": "safetensors",
+    "push_format": "safetensors",
+    "quantization": "Q4_K_M",
+    "push_quantization": None,
+}
 
 
 class TurboModel:
@@ -57,6 +63,7 @@ def __init__(
         model: PreTrainedModel,
         tokenizer: PreTrainedTokenizer,
         config: SmartConfig,
+        export_push_config: Optional[Dict[str, Any]] = None,
         verbose: bool = False,
     ):
         """
@@ -73,9 +80,7 @@ def __init__(
         self._is_quantized = False
         self._is_finetuned = False
         self._lora_applied = False
-        self._is_quantized = False
-        self._is_finetuned = False
-        self._lora_applied = False
+        self.export_push_config = self._build_export_push_config(export_push_config)
         self.verbose = verbose
     
     @classmethod
@@ -92,6 +97,7 @@ def from_pretrained(
         trust_remote_code: bool = True,
         quantize: bool = True,
         config_override: Optional[Dict[str, Any]] = None,
+        config: Optional[Dict[str, Any]] = None,
         verbose: bool = True,
     ) -> "TurboModel":
         """
@@ -112,8 +118,7 @@ def from_pretrained(
             trust_remote_code: Trust remote code in model
             quantize: Whether to quantize the model
             config_override: Dict to override any auto-detected settings
-            quantize: Whether to quantize the model
-            config_override: Dict to override any auto-detected settings
+            config: Shared export/push config (format, quantization, push_format, etc.)
             verbose: Print loading progress
             
         Returns:
@@ -268,7 +273,7 @@ def from_pretrained(
             print_success("Model loaded successfully!")
             logger.info("")
         
-        instance = cls(model, tokenizer, smart_config)
+        instance = cls(model, tokenizer, smart_config, export_push_config=config)
         instance._is_quantized = quantize and smart_config.bits < 16
         
         return instance
@@ -494,6 +499,30 @@ def _get_quantization_kwargs(config: SmartConfig) -> Dict[str, Any]:
         except ImportError:
             logger.warning("⚠ bitsandbytes not installed, loading without quantization")
             return {}
+
+    @staticmethod
+    def _build_export_push_config(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
+        """Build shared export/push config with deterministic defaults."""
+        resolved = dict(DEFAULT_EXPORT_PUSH_CONFIG)
+        if config:
+            aliases = {
+                "export_format": "format",
+                "export_quantization": "quantization",
+            }
+            nullable_overrides = {"push_quantization"}
+            for key, value in config.items():
+                mapped_key = aliases.get(key, key)
+                if mapped_key in resolved and (
+                    value is not None or mapped_key in nullable_overrides
+                ):
+                    resolved[mapped_key] = value
+
+            if "format" in config and "push_format" not in config:
+                resolved["push_format"] = resolved["format"]
+            if "quantization" in config and "push_quantization" not in config:
+                resolved["push_quantization"] = resolved["quantization"]
+
+        return resolved
     
     @staticmethod
     def _enable_flash_attention(model: PreTrainedModel, verbose: bool = True) -> None:
@@ -945,7 +974,7 @@ def tokenize_function(examples):
     
     def export(
         self,
-        format: str,
+        format: Optional[str] = None,
         output_path: Optional[str] = None,
         *,
         quantization: Optional[str] = None,
@@ -961,7 +990,7 @@ def export(
             - "mlx": For Apple Silicon Macs
         
         Args:
-            format: Target format (gguf, safetensors, onnx, mlx)
+            format: Target format (gguf, safetensors, onnx, mlx). Uses shared config when omitted.
             output_path: Output file/directory path
             quantization: Format-specific quantization:
                 - GGUF: Q4_K_M, Q5_K_M, Q8_0, etc.
@@ -978,7 +1007,16 @@ def export(
             >>> model.export("onnx", "./my_model_onnx/")
             >>> model.export("mlx", "./my_model_mlx/", quantization="4bit")
         """
-        format = format.lower()
+        format = (
+            format
+            if format is not None
+            else self.export_push_config.get("format", DEFAULT_EXPORT_PUSH_CONFIG["format"])
+        ).lower()
+        effective_quantization = quantization
+        if effective_quantization is None and format == "gguf":
+            effective_quantization = self.export_push_config.get(
+                "quantization", DEFAULT_EXPORT_PUSH_CONFIG["quantization"]
+            )
         
         # Merge LoRA if applied
         if self._lora_applied:
@@ -991,7 +1029,7 @@ def export(
         if output_path is None:
             model_name = self.model.config._name_or_path.split('/')[-1]
             if format == "gguf":
-                quant = quantization or self.config.quant_type or "q4_k_m"
+                quant = effective_quantization
                 output_path = f"{model_name}.{quant.upper()}.gguf"
             elif format == "safetensors":
                 output_path = f"./{model_name}-quantllm/"
@@ -1012,7 +1050,7 @@ def export(
             raise ValueError(f"Unknown format: {format}. Supported: {list(exporters.keys())}")
         
         print_header(f"Exporting to {format.upper()}")
-        result = exporters[format](output_path, quantization=quantization, **kwargs)
+        result = exporters[format](output_path, quantization=effective_quantization, **kwargs)
         print_success(f"Exported to: {result}")
         
         return result
@@ -1021,7 +1059,7 @@ def push_to_hub(
         self,
         repo_id: str,
         token: Optional[str] = None,
-        format: str = "safetensors",
+        format: Optional[str] = None,
         quantization: Optional[str] = None,
         commit_message: str = "Upload model via QuantLLM",
         license: str = "apache-2.0",
@@ -1052,7 +1090,14 @@ def push_to_hub(
         """
         from ..hub import QuantLLMHubManager
         
-        format_lower = format.lower()
+        format_lower = (
+            format
+            if format is not None
+            else self.export_push_config.get("push_format", DEFAULT_EXPORT_PUSH_CONFIG["push_format"])
+        ).lower()
+        push_quantization = quantization or self.export_push_config.get(
+            "push_quantization", DEFAULT_EXPORT_PUSH_CONFIG["push_quantization"]
+        )
         
         # Get the original base model name (full path for HuggingFace link)
         base_model_full = self.model.config._name_or_path
@@ -1066,7 +1111,9 @@ def push_to_hub(
         
         if format_lower == "gguf":
             # Export GGUF directly to staging
-            quant_label = quantization or (self.config.quant_type if self.config.quant_type != "GGUF" else "q4_k_m") or "q4_k_m"
+            quant_label = push_quantization or self.export_push_config.get(
+                "quantization", DEFAULT_EXPORT_PUSH_CONFIG["quantization"]
+            )
             filename = f"{model_name}.{quant_label.upper()}.gguf"
             save_path = os.path.join(manager.staging_dir, filename)
             
@@ -1085,11 +1132,11 @@ def push_to_hub(
             print_info("Exporting to ONNX format...")
             save_path = manager.staging_dir
             
-            self._export_onnx(save_path, quantization=quantization, **kwargs)
+            self._export_onnx(save_path, quantization=push_quantization, **kwargs)
             
             manager.track_hyperparameters({
                 "format": "onnx",
-                "quantization": quantization,
+                "quantization": push_quantization,
                 "base_model": base_model_full,
                 "license": license,
             })
@@ -1100,11 +1147,11 @@ def push_to_hub(
             print_info("Exporting to MLX format...")
             save_path = manager.staging_dir
             
-            self._export_mlx(save_path, quantization=quantization, **kwargs)
+            self._export_mlx(save_path, quantization=push_quantization, **kwargs)
             
             manager.track_hyperparameters({
                 "format": "mlx",
-                "quantization": quantization,
+                "quantization": push_quantization,
                 "base_model": base_model_full,
                 "license": license,
             })
@@ -1117,7 +1164,7 @@ def push_to_hub(
                 "base_model": base_model_full,
                 "license": license,
             })
-            manager.save_final_model(self, format=format)
+            manager.save_final_model(self, format=format_lower)
             manager._generate_model_card(format=format_lower)
             
         manager.push(commit_message=commit_message)
@@ -1852,6 +1899,7 @@ def turbo(
     max_length: Optional[int] = None,
     device: Optional[str] = None,
     dtype: Optional[str] = None,
+    config: Optional[Dict[str, Any]] = None,
     **kwargs,
 ) -> TurboModel:
     """
@@ -1866,6 +1914,7 @@ def turbo(
         max_length: Override max sequence length (default: auto)
         device: Override device (default: best GPU)
         dtype: Override dtype (default: bf16/fp16)
+        config: Shared export/push config (format, quantization, push_format, etc.)
         **kwargs: Additional options passed to from_pretrained
         
     Returns:
@@ -1896,5 +1945,6 @@ def turbo(
         max_length=max_length,
         device=device,
         dtype=dtype,
+        config=config,
         **kwargs,
     )
diff --git a/quantllm/hub/model_card.py b/quantllm/hub/model_card.py
index 33a887f..66d8513 100644
--- a/quantllm/hub/model_card.py
+++ b/quantllm/hub/model_card.py
@@ -427,7 +427,7 @@ def _generate_details_section(self) -> str:
 | **Quantization** | {self.quantization or "Full Precision"} |
 | **License** | `{self.license}` |
 | **Export Date** | {datetime.now().strftime("%Y-%m-%d")} |
-| **Exported By** | [QuantLLM v2.0](https://github.com/codewithdark-git/QuantLLM) |
+| **Exported By** | [QuantLLM v2.1](https://github.com/codewithdark-git/QuantLLM) |
 '''
     
     def _generate_quantization_section(self) -> str:
diff --git a/requirements.txt b/requirements.txt
index 4a67ebe..ef5e1c8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-# QuantLLM v2.0 Requirements
+# QuantLLM v2.1 (pre-release) Requirements
 
 # Core dependencies
 torch>=2.0.0
diff --git a/setup.py b/setup.py
index c858e64..eb2ebc8 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
 
 setup(
     name="quantllm",
-    version="2.0.0",
+    version="2.1.0rc1",
     author="Dark Coder",
     author_email="codewithdark90@gmail.com",
     description="Ultra-fast LLM quantization, fine-tuning, and deployment with one line of code",
@@ -117,4 +117,4 @@
     },
     include_package_data=True,
     zip_safe=False,
-)
\ No newline at end of file
+)
diff --git a/tests/test_export_push_config.py b/tests/test_export_push_config.py
new file mode 100644
index 0000000..dba32bf
--- /dev/null
+++ b/tests/test_export_push_config.py
@@ -0,0 +1,164 @@
+from types import SimpleNamespace
+
+from quantllm.core.turbo_model import TurboModel
+
+
+def _stub_model(name: str = "org/test-model"):
+    return SimpleNamespace(config=SimpleNamespace(_name_or_path=name))
+
+
+def _stub_turbo(export_push_config):
+    model = TurboModel.__new__(TurboModel)
+    model.model = _stub_model()
+    model.tokenizer = None
+    smart_config = SimpleNamespace(quant_type="Q8_0")
+    model.config = smart_config
+    model._lora_applied = False
+    model.verbose = False
+    model.export_push_config = export_push_config
+    return model
+
+
+def test_build_export_push_config_uses_deterministic_defaults():
+    resolved = TurboModel._build_export_push_config(None)
+    assert resolved["format"] == "safetensors"
+    assert resolved["push_format"] == "safetensors"
+    assert resolved["quantization"] == "Q4_K_M"
+    assert resolved["push_quantization"] is None
+
+
+def test_build_export_push_config_aligns_push_values_with_export_values():
+    resolved = TurboModel._build_export_push_config(
+        {"format": "gguf", "quantization": "Q5_K_M"}
+    )
+    assert resolved["format"] == "gguf"
+    assert resolved["push_format"] == "gguf"
+    assert resolved["quantization"] == "Q5_K_M"
+    assert resolved["push_quantization"] == "Q5_K_M"
+
+
+def test_build_export_push_config_allows_nullable_push_quantization_override():
+    resolved = TurboModel._build_export_push_config(
+        {"format": "gguf", "quantization": "Q5_K_M", "push_quantization": None}
+    )
+    assert resolved["quantization"] == "Q5_K_M"
+    assert resolved["push_quantization"] is None
+
+
+def test_export_prefers_shared_quantization_over_smart_config_quant_type():
+    model = _stub_turbo(
+        {
+            "format": "gguf",
+            "push_format": "gguf",
+            "quantization": "Q4_K_M",
+            "push_quantization": "Q4_K_M",
+        }
+    )
+
+    captured = {}
+
+    def fake_export_gguf(output_path, quantization=None, **kwargs):
+        captured["output_path"] = output_path
+        captured["quantization"] = quantization
+        return output_path
+
+    model._export_gguf = fake_export_gguf
+    model._export_safetensors = lambda *args, **kwargs: ""
+    model._export_onnx = lambda *args, **kwargs: ""
+    model._export_mlx = lambda *args, **kwargs: ""
+
+    output = model.export()
+
+    assert model.config.quant_type == "Q8_0"
+    assert output.endswith(".Q4_K_M.gguf")
+    assert captured["quantization"] == "Q4_K_M"
+
+
+def test_gguf_push_uses_shared_config_when_omitted(monkeypatch, tmp_path):
+    model = _stub_turbo({
+        "format": "gguf",
+        "push_format": "gguf",
+        "quantization": "Q4_K_M",
+        "push_quantization": "Q4_K_M",
+    })
+
+    calls = {}
+
+    def fake_export(*, format, output_path, quantization=None, **kwargs):
+        calls["export"] = {
+            "format": format,
+            "output_path": output_path,
+            "quantization": quantization,
+        }
+        return output_path
+
+    model.export = fake_export
+
+    class FakeManager:
+        def __init__(self, repo_id, hf_token=None):
+            self.staging_dir = str(tmp_path / "quantllm-test-staging")
+
+        def track_hyperparameters(self, params):
+            calls["tracked"] = params
+
+        def _generate_model_card(self, format):
+            calls["card_format"] = format
+
+        def push(self, commit_message):
+            calls["pushed"] = commit_message
+
+        def save_final_model(self, *args, **kwargs):
+            raise AssertionError(
+                "save_final_model should not be called for GGUF push"
+            )
+
+    import quantllm.hub as hub_module
+
+    monkeypatch.setattr(hub_module, "QuantLLMHubManager", FakeManager)
+
+    model.push("user/repo")
+
+    assert calls["export"]["format"] == "gguf"
+    assert calls["export"]["quantization"] == "Q4_K_M"
+    assert calls["tracked"]["quantization"] == "Q4_K_M"
+
+
+def test_onnx_push_does_not_force_quantization(monkeypatch, tmp_path):
+    model = _stub_turbo(
+        TurboModel._build_export_push_config({"push_format": "onnx"})
+    )
+
+    calls = {}
+
+    class FakeManager:
+        def __init__(self, repo_id, hf_token=None):
+            self.staging_dir = str(tmp_path / "quantllm-test-staging")
+
+        def track_hyperparameters(self, params):
+            calls["tracked"] = params
+
+        def _generate_model_card(self, format):
+            calls["card_format"] = format
+
+        def push(self, commit_message):
+            calls["pushed"] = commit_message
+
+        def save_final_model(self, *args, **kwargs):
+            raise AssertionError(
+                "save_final_model should not be called for ONNX push"
+            )
+
+    def fake_export_onnx(output_path, quantization=None, **kwargs):
+        calls["onnx_quantization"] = quantization
+        return output_path
+
+    model._export_onnx = fake_export_onnx
+
+    import quantllm.hub as hub_module
+
+    monkeypatch.setattr(hub_module, "QuantLLMHubManager", FakeManager)
+
+    model.push("user/repo")
+
+    assert calls["onnx_quantization"] is None
+    assert calls["tracked"]["quantization"] is None