From 13713d4a4bb043222d6f19e05ddf16c8d71076cb Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Thu, 21 Aug 2025 13:42:00 +0000
Subject: [PATCH 01/10] reduces cli args redundancy

---
 src/lighteval/cli_args.py        | 148 +++++++++++++
 src/lighteval/main_accelerate.py | 122 ++++-------
 src/lighteval/main_baseline.py   |  36 ++--
 src/lighteval/main_custom.py     |  98 +++------
 src/lighteval/main_endpoint.py   | 360 +++++++++----------------------
 src/lighteval/main_nanotron.py   |  26 +--
 src/lighteval/main_sglang.py     | 121 ++++-------
 src/lighteval/main_vllm.py       | 121 ++++-------
 8 files changed, 428 insertions(+), 604 deletions(-)
 create mode 100644 src/lighteval/cli_args.py
diff --git a/src/lighteval/cli_args.py b/src/lighteval/cli_args.py
new file mode 100644
index 000000000..70697b906
--- /dev/null
+++ b/src/lighteval/cli_args.py
@@ -0,0 +1,148 @@
+# MIT License
+
+# Copyright (c) 2024 The HuggingFace Team
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""
+Common CLI argument types for LightEval main files.
+This module exports pre-defined argument types to reduce redundancy across main_*.py files.
+"""
+
+from typing import Optional
+
+from typer import Argument, Option
+from typing_extensions import Annotated
+
+
+# Help panel names for consistent organization
+HELP_PANEL_NAME_1 = "Common Parameters"
+HELP_PANEL_NAME_2 = "Logging Parameters"
+HELP_PANEL_NAME_3 = "Debug Parameters"
+HELP_PANEL_NAME_4 = "Modeling Parameters"
+
+
+# Common Parameters (HELP_PANEL_NAME_1)
+DatasetLoadingProcesses = Annotated[
+    int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
+]
+
+CustomTasks = Annotated[
+    Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1)
+]
+
+NumFewshotSeeds = Annotated[
+    int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
+]
+
+LoadResponsesFromDetailsDateId = Annotated[
+    Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1)
+]
+
+RemoveReasoningTags = Annotated[
+    bool | None,
+    Option(
+        help="Remove reasoning tags from responses (true to remove, false to leave - true by default).",
+        rich_help_panel=HELP_PANEL_NAME_1,
+    ),
+]
+
+ReasoningTags = Annotated[
+    str | None,
+    Option(
+        help="List of reasoning tags (provided as pairs) to remove from responses. Default is [('<think>', '</think>')].",
+        rich_help_panel=HELP_PANEL_NAME_1,
+    ),
+]
+
+
+# Logging Parameters (HELP_PANEL_NAME_2)
+OutputDir = Annotated[str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)]
+
+ResultsPathTemplate = Annotated[
+    str | None,
+    Option(
+        help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
+        rich_help_panel=HELP_PANEL_NAME_2,
+    ),
+]
+
+PushToHub = Annotated[bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)]
+
+PushToTensorboard = Annotated[bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2)]
+
+PublicRun = Annotated[
+    bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2)
+]
+
+ResultsOrg = Annotated[
+    Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2)
+]
+
+SaveDetails = Annotated[
+    bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2)
+]
+
+Wandb = Annotated[
+    bool,
+    Option(
+        help="Push results to wandb or trackio if available. We use env variable to configure trackio or wandb. see here: https://docs.wandb.ai/guides/track/environment-variables/, https://github.com/gradio-app/trackio",
+        rich_help_panel=HELP_PANEL_NAME_2,
+    ),
+]
+
+
+# Debug Parameters (HELP_PANEL_NAME_3)
+MaxSamples = Annotated[
+    Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
+]
+
+JobId = Annotated[int, Option(help="Optional job id for future reference.", rich_help_panel=HELP_PANEL_NAME_3)]
+
+
+# Common argument patterns
+Tasks = Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")]
+
+ModelArgs = Annotated[
+    str,
+    Argument(
+        help="Model arguments in the form key1=value1,key2=value2,... or path to yaml config file (see examples/model_configs/transformers_model.yaml)"
+    ),
+]
+
+
+# Default values for common arguments
+DEFAULT_VALUES = {
+    "dataset_loading_processes": 1,
+    "custom_tasks": None,
+    "num_fewshot_seeds": 1,
+    "load_responses_from_details_date_id": None,
+    "remove_reasoning_tags": True,
+    "reasoning_tags": None,
+    "output_dir": "results",
+    "results_path_template": None,
+    "push_to_hub": False,
+    "push_to_tensorboard": False,
+    "public_run": False,
+    "results_org": None,
+    "save_details": False,
+    "wandb": False,
+    "max_samples": None,
+    "job_id": 0,
+}
diff --git a/src/lighteval/main_accelerate.py b/src/lighteval/main_accelerate.py
index 1b3a3c6c8..ab9d74aa5 100644
--- a/src/lighteval/main_accelerate.py
+++ b/src/lighteval/main_accelerate.py
@@ -21,99 +21,65 @@
 # SOFTWARE.
 
 import logging
-from typing import Optional
 
-from typer import Argument, Option
+from typer import Option
 from typing_extensions import Annotated
 
+from lighteval.cli_args import (
+    DEFAULT_VALUES,
+    HELP_PANEL_NAME_4,
+    CustomTasks,
+    DatasetLoadingProcesses,
+    JobId,
+    LoadResponsesFromDetailsDateId,
+    MaxSamples,
+    ModelArgs,
+    NumFewshotSeeds,
+    OutputDir,
+    PublicRun,
+    PushToHub,
+    PushToTensorboard,
+    ReasoningTags,
+    RemoveReasoningTags,
+    ResultsOrg,
+    ResultsPathTemplate,
+    SaveDetails,
+    Tasks,
+    Wandb,
+)
 
-logger = logging.getLogger(__name__)
 
-HELP_PANEL_NAME_1 = "Common Parameters"
-HELP_PANEL_NAME_2 = "Logging Parameters"
-HELP_PANEL_NAME_3 = "Debug Parameters"
-HELP_PANEL_NAME_4 = "Modeling Parameters"
+logger = logging.getLogger(__name__)
 
 
 def accelerate(  # noqa C901
     # === general ===
-    model_args: Annotated[
-        str,
-        Argument(
-            help="Model arguments in the form key1=value1,key2=value2,... or path to yaml config file (see examples/model_configs/transformers_model.yaml)"
-        ),
-    ],
-    tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
+    model_args: ModelArgs,
+    tasks: Tasks,
     # === Common parameters ===
     vision_model: Annotated[
         bool, Option(help="Use vision model for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
     ] = False,
-    dataset_loading_processes: Annotated[
-        int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    custom_tasks: Annotated[
-        Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
-    num_fewshot_seeds: Annotated[
-        int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    load_responses_from_details_date_id: Annotated[
-        Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
-    remove_reasoning_tags: Annotated[
-        bool | None,
-        Option(
-            help="Remove reasoning tags from responses (true to remove, false to leave - true by default).",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = True,
-    reasoning_tags: Annotated[
-        str | None,
-        Option(
-            help="List of reasoning tags (as pairs) to remove from responses. Default is [('<think>', '</think>')].",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = None,
+    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
+    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
+    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
+    load_responses_from_details_date_id: LoadResponsesFromDetailsDateId = DEFAULT_VALUES[
+        "load_responses_from_details_date_id"
+    ],
+    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
+    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
     # === saving ===
-    output_dir: Annotated[
-        str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = "results",
-    results_path_template: Annotated[
-        str | None,
-        Option(
-            help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
-            rich_help_panel=HELP_PANEL_NAME_2,
-        ),
-    ] = None,
-    push_to_hub: Annotated[
-        bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    push_to_tensorboard: Annotated[
-        bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    public_run: Annotated[
-        bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    results_org: Annotated[
-        Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = None,
-    save_details: Annotated[
-        bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    wandb: Annotated[
-        bool,
-        Option(
-            help="Push results to wandb or trackio if available. We use env variable to configure trackio or wandb. see here: https://docs.wandb.ai/guides/track/environment-variables/, https://github.com/gradio-app/trackio",
-            rich_help_panel=HELP_PANEL_NAME_2,
-        ),
-    ] = False,
+    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
+    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
+    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
+    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
+    public_run: PublicRun = DEFAULT_VALUES["public_run"],
+    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
+    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
+    wandb: Wandb = DEFAULT_VALUES["wandb"],
     # === debug ===
-    max_samples: Annotated[
-        Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = None,
-    job_id: Annotated[
-        int, Option(help="Optional job id for future reference.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = 0,
+    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
+    job_id: JobId = DEFAULT_VALUES["job_id"],
 ):
     """
     Evaluate models using accelerate and transformers as backend.
diff --git a/src/lighteval/main_baseline.py b/src/lighteval/main_baseline.py
index b4195d116..035cad276 100644
--- a/src/lighteval/main_baseline.py
+++ b/src/lighteval/main_baseline.py
@@ -21,32 +21,22 @@
 # SOFTWARE.
 
 
-from typing import Optional
-
-from typer import Argument, Option
-from typing_extensions import Annotated
-
-
-HELP_PANEL_NAME_1 = "Common Parameters"
-HELP_PANEL_NAME_2 = "Logging Parameters"
-HELP_PANEL_NAME_3 = "Debug Parameters"
-HELP_PANEL_NAME_4 = "Modeling Parameters"
+from lighteval.cli_args import (
+    DEFAULT_VALUES,
+    CustomTasks,
+    DatasetLoadingProcesses,
+    MaxSamples,
+    OutputDir,
+    Tasks,
+)
 
 
 def baseline(
-    tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
-    custom_tasks: Annotated[
-        Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
-    dataset_loading_processes: Annotated[
-        int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    output_dir: Annotated[
-        str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = "results",
-    max_samples: Annotated[
-        Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = None,
+    tasks: Tasks,
+    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
+    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
+    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
+    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
 ):
     """
     Compute baselines for given tasks.
diff --git a/src/lighteval/main_custom.py b/src/lighteval/main_custom.py
index 6883e3667..d2152b585 100644
--- a/src/lighteval/main_custom.py
+++ b/src/lighteval/main_custom.py
@@ -19,87 +19,59 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
-from typing import Optional
+
 
 import typer
-from typer import Argument, Option
+from typer import Argument
 from typing_extensions import Annotated
 
+from lighteval.cli_args import (
+    DEFAULT_VALUES,
+    CustomTasks,
+    DatasetLoadingProcesses,
+    JobId,
+    MaxSamples,
+    NumFewshotSeeds,
+    OutputDir,
+    PublicRun,
+    PushToHub,
+    PushToTensorboard,
+    ReasoningTags,
+    RemoveReasoningTags,
+    ResultsOrg,
+    ResultsPathTemplate,
+    SaveDetails,
+    Tasks,
+)
 from lighteval.models.custom.custom_model import CustomModelConfig
 
 
 app = typer.Typer()
 
 
-HELP_PANEL_NAME_1 = "Common Parameters"
-HELP_PANEL_NAME_2 = "Logging Parameters"
-HELP_PANEL_NAME_3 = "Debug Parameters"
-HELP_PANEL_NAME_4 = "Modeling Parameters"
-
-
 @app.command(rich_help_panel="Evaluation Backends")
 def custom(
     # === general ===
     model_name: Annotated[str, Argument(help="The model name to evaluate")],
     model_definition_file_path: Annotated[str, Argument(help="The model definition file path to evaluate")],
-    tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
+    tasks: Tasks,
     # === Common parameters ===
-    dataset_loading_processes: Annotated[
-        int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    custom_tasks: Annotated[
-        Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
-    num_fewshot_seeds: Annotated[
-        int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    remove_reasoning_tags: Annotated[
-        bool | None,
-        Option(
-            help="Remove reasoning tags from responses (true to remove, false to leave - true by default).",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = True,
-    reasoning_tags: Annotated[
-        str | None,
-        Option(
-            help="List of reasoning tags (provided as pairs) to remove from responses. Default is [('<think>', '</think>')].",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = None,
+    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
+    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
+    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
+    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
+    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
     # === saving ===
-    output_dir: Annotated[
-        str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = "results",
-    results_path_template: Annotated[
-        str | None,
-        Option(
-            help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
-            rich_help_panel=HELP_PANEL_NAME_2,
-        ),
-    ] = None,
-    push_to_hub: Annotated[
-        bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    push_to_tensorboard: Annotated[
-        bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    public_run: Annotated[
-        bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    results_org: Annotated[
-        Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = None,
-    save_details: Annotated[
-        bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
+    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
+    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
+    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
+    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
+    public_run: PublicRun = DEFAULT_VALUES["public_run"],
+    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
+    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
     # === debug ===
-    max_samples: Annotated[
-        Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = None,
-    job_id: Annotated[
-        int, Option(help="Optional job id for future refenrence.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = 0,
+    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
+    job_id: JobId = DEFAULT_VALUES["job_id"],
 ):
     """
     Evaluate custom models (can be anything).
diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py
index 7ce9aa996..08c9ee33c 100644
--- a/src/lighteval/main_endpoint.py
+++ b/src/lighteval/main_endpoint.py
@@ -19,20 +19,36 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
-from typing import Optional
+
 
 import typer
 from typer import Argument, Option
 from typing_extensions import Annotated
 
-
-app = typer.Typer()
+from lighteval.cli_args import (
+    DEFAULT_VALUES,
+    HELP_PANEL_NAME_4,
+    CustomTasks,
+    DatasetLoadingProcesses,
+    JobId,
+    LoadResponsesFromDetailsDateId,
+    MaxSamples,
+    NumFewshotSeeds,
+    OutputDir,
+    PublicRun,
+    PushToHub,
+    PushToTensorboard,
+    ReasoningTags,
+    RemoveReasoningTags,
+    ResultsOrg,
+    ResultsPathTemplate,
+    SaveDetails,
+    Tasks,
+    Wandb,
+)
 
 
-HELP_PANEL_NAME_1 = "Common Parameters"
-HELP_PANEL_NAME_2 = "Logging Parameters"
-HELP_PANEL_NAME_3 = "Debug Parameters"
-HELP_PANEL_NAME_4 = "Modeling Parameters"
+app = typer.Typer()
 
 
 @app.command(rich_help_panel="Evaluation Backends")
@@ -41,7 +57,7 @@ def inference_endpoint(
     model_config_path: Annotated[
         str, Argument(help="Path to model config yaml file. (examples/model_configs/endpoint_model.yaml)")
     ],
-    tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
+    tasks: Tasks,
     free_endpoint: Annotated[
         bool,
         Option(
@@ -50,72 +66,26 @@ def inference_endpoint(
         ),
     ] = False,
     # === Common parameters ===
-    dataset_loading_processes: Annotated[
-        int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    custom_tasks: Annotated[
-        Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
-    num_fewshot_seeds: Annotated[
-        int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    load_responses_from_details_date_id: Annotated[
-        Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
-    remove_reasoning_tags: Annotated[
-        bool | None,
-        Option(
-            help="Remove reasoning tags from responses (true to remove, false to leave - true by default).",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = True,
-    reasoning_tags: Annotated[
-        str | None,
-        Option(
-            help="List of reasoning tags (provided as pairs) to remove from responses. Default is [('<think>', '</think>')].",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = None,
+    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
+    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
+    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
+    load_responses_from_details_date_id: LoadResponsesFromDetailsDateId = DEFAULT_VALUES[
+        "load_responses_from_details_date_id"
+    ],
+    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
+    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
     # === saving ===
-    output_dir: Annotated[
-        str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = "results",
-    results_path_template: Annotated[
-        str | None,
-        Option(
-            help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
-            rich_help_panel=HELP_PANEL_NAME_2,
-        ),
-    ] = None,
-    push_to_hub: Annotated[
-        bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    push_to_tensorboard: Annotated[
-        bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    public_run: Annotated[
-        bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    results_org: Annotated[
-        Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = None,
-    save_details: Annotated[
-        bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    wandb: Annotated[
-        bool,
-        Option(
-            help="Push results to wandb or trackio if available. We use env variable to configure trackio or wandb. see here: https://docs.wandb.ai/guides/track/environment-variables/, https://github.com/gradio-app/trackio",
-            rich_help_panel=HELP_PANEL_NAME_2,
-        ),
-    ] = False,
+    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
+    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
+    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
+    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
+    public_run: PublicRun = DEFAULT_VALUES["public_run"],
+    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
+    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
+    wandb: Wandb = DEFAULT_VALUES["wandb"],
     # === debug ===
-    max_samples: Annotated[
-        Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = None,
-    job_id: Annotated[
-        int, Option(help="Optional job id for future reference.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = 0,
+    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
+    job_id: JobId = DEFAULT_VALUES["job_id"],
 ):
     """
     Evaluate models using inference-endpoints as backend.
@@ -177,74 +147,28 @@ def tgi(
     model_config_path: Annotated[
         str, Argument(help="Path to model config yaml file. (examples/model_configs/tgi_model.yaml)")
     ],
-    tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
+    tasks: Tasks,
     # === Common parameters ===
-    dataset_loading_processes: Annotated[
-        int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    custom_tasks: Annotated[
-        Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
-    num_fewshot_seeds: Annotated[
-        int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    load_responses_from_details_date_id: Annotated[
-        Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
-    remove_reasoning_tags: Annotated[
-        bool | None,
-        Option(
-            help="Remove reasoning tags from responses (true to remove, false to leave - true by default).",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = True,
-    reasoning_tags: Annotated[
-        str | None,
-        Option(
-            help="List of reasoning tags (provided as pairs) to remove from responses. Default is [('<think>', '</think>')].",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = None,
+    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
+    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
+    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
+    load_responses_from_details_date_id: LoadResponsesFromDetailsDateId = DEFAULT_VALUES[
+        "load_responses_from_details_date_id"
+    ],
+    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
+    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
     # === saving ===
-    output_dir: Annotated[
-        str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = "results",
-    results_path_template: Annotated[
-        str | None,
-        Option(
-            help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
-            rich_help_panel=HELP_PANEL_NAME_2,
-        ),
-    ] = None,
-    push_to_hub: Annotated[
-        bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    push_to_tensorboard: Annotated[
-        bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    public_run: Annotated[
-        bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    results_org: Annotated[
-        Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = None,
-    save_details: Annotated[
-        bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    wandb: Annotated[
-        bool,
-        Option(
-            help="Push results to wandb or trackio if available. We use env variable to configure trackio or wandb. see here: https://docs.wandb.ai/guides/track/environment-variables/, https://github.com/gradio-app/trackio",
-            rich_help_panel=HELP_PANEL_NAME_2,
-        ),
-    ] = False,
+    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
+    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
+    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
+    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
+    public_run: PublicRun = DEFAULT_VALUES["public_run"],
+    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
+    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
+    wandb: Wandb = DEFAULT_VALUES["wandb"],
     # === debug ===
-    max_samples: Annotated[
-        Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = None,
-    job_id: Annotated[
-        int, Option(help="Optional job id for future reference.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = 0,
+    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
+    job_id: JobId = DEFAULT_VALUES["job_id"],
 ):
     """
     Evaluate models using TGI as backend.
@@ -313,74 +237,28 @@ def litellm(
             help="config file path for the litellm model, or a comma separated string of model args (model_name={},base_url={},provider={})"
         ),
     ],
-    tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
+    tasks: Tasks,
     # === Common parameters ===
-    dataset_loading_processes: Annotated[
-        int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    custom_tasks: Annotated[
-        Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
-    num_fewshot_seeds: Annotated[
-        int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    load_responses_from_details_date_id: Annotated[
-        Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
-    remove_reasoning_tags: Annotated[
-        bool | None,
-        Option(
-            help="Remove reasoning tags from responses (true to remove, false to leave - true by default).",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = True,
-    reasoning_tags: Annotated[
-        str | None,
-        Option(
-            help="List of reasoning tags (provided as pairs) to remove from responses. Default is [('<think>', '</think>')].",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = None,
+    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
+    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
+    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
+    load_responses_from_details_date_id: LoadResponsesFromDetailsDateId = DEFAULT_VALUES[
+        "load_responses_from_details_date_id"
+    ],
+    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
+    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
     # === saving ===
-    output_dir: Annotated[
-        str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = "results",
-    results_path_template: Annotated[
-        str | None,
-        Option(
-            help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
-            rich_help_panel=HELP_PANEL_NAME_2,
-        ),
-    ] = None,
-    push_to_hub: Annotated[
-        bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    push_to_tensorboard: Annotated[
-        bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    public_run: Annotated[
-        bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    results_org: Annotated[
-        Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = None,
-    save_details: Annotated[
-        bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    wandb: Annotated[
-        bool,
-        Option(
-            help="Push results to wandb or trackio if available. We use env variable to configure trackio or wandb. see here: https://docs.wandb.ai/guides/track/environment-variables/, https://github.com/gradio-app/trackio",
-            rich_help_panel=HELP_PANEL_NAME_2,
-        ),
-    ] = False,
+    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
+    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
+    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
+    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
+    public_run: PublicRun = DEFAULT_VALUES["public_run"],
+    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
+    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
+    wandb: Wandb = DEFAULT_VALUES["wandb"],
     # === debug ===
-    max_samples: Annotated[
-        Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = None,
-    job_id: Annotated[
-        int, Option(help="Optional job id for future refenrence.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = 0,
+    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
+    job_id: JobId = DEFAULT_VALUES["job_id"],
 ):
     """
     Evaluate models using LiteLLM as backend.
@@ -453,71 +331,25 @@ def inference_providers(
             help="config file path for the inference provider model, or a comma separated string of model args (model_name={},provider={},generation={temperature: 0.6})"
         ),
     ],
-    tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
+    tasks: Tasks,
     # === Common parameters ===
-    dataset_loading_processes: Annotated[
-        int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    custom_tasks: Annotated[
-        Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
-    num_fewshot_seeds: Annotated[
-        int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
+    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
+    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
+    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
     # === saving ===
-    output_dir: Annotated[
-        str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = "results",
-    results_path_template: Annotated[
-        str | None,
-        Option(
-            help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
-            rich_help_panel=HELP_PANEL_NAME_2,
-        ),
-    ] = None,
-    push_to_hub: Annotated[
-        bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    push_to_tensorboard: Annotated[
-        bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    public_run: Annotated[
-        bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    results_org: Annotated[
-        Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = None,
-    save_details: Annotated[
-        bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    wandb: Annotated[
-        bool,
-        Option(
-            help="Push results to wandb or trackio if available. We use env variable to configure trackio or wandb. see here: https://docs.wandb.ai/guides/track/environment-variables/, https://github.com/gradio-app/trackio",
-            rich_help_panel=HELP_PANEL_NAME_2,
-        ),
-    ] = False,
-    remove_reasoning_tags: Annotated[
-        bool | None,
-        Option(
-            help="Remove reasoning tags from responses (true to remove, false to leave - true by default).",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = True,
-    reasoning_tags: Annotated[
-        str | None,
-        Option(
-            help="List of reasoning tags (provided as pairs) to remove from responses. Default is [('<think>', '</think>')].",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = None,
+    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
+    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
+    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
+    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
+    public_run: PublicRun = DEFAULT_VALUES["public_run"],
+    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
+    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
+    wandb: Wandb = DEFAULT_VALUES["wandb"],
+    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
+    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
     # === debug ===
-    max_samples: Annotated[
-        Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = None,
-    job_id: Annotated[
-        int, Option(help="Optional job id for future reference.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = 0,
+    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
+    job_id: JobId = DEFAULT_VALUES["job_id"],
 ):
     """
     Evaluate models using HuggingFace's inference providers as backend.
diff --git a/src/lighteval/main_nanotron.py b/src/lighteval/main_nanotron.py
index 1ded89850..936220331 100644
--- a/src/lighteval/main_nanotron.py
+++ b/src/lighteval/main_nanotron.py
@@ -28,11 +28,11 @@
 from typing_extensions import Annotated
 from yaml import SafeLoader
 
-
-HELP_PANEL_NAME_1 = "Common Parameters"
-HELP_PANEL_NAME_2 = "Logging Parameters"
-HELP_PANEL_NAME_3 = "Debug Parameters"
-HELP_PANEL_NAME_4 = "Modeling Parameters"
+from lighteval.cli_args import (
+    DEFAULT_VALUES,
+    ReasoningTags,
+    RemoveReasoningTags,
+)
 
 
 SEED = 1234
@@ -43,20 +43,8 @@ def nanotron(
         str, Option(help="Path to the nanotron checkpoint YAML or python config file, potentially on s3.")
     ],
     lighteval_config_path: Annotated[str, Option(help="Path to a YAML config to be used for the evaluation.")],
-    remove_reasoning_tags: Annotated[
-        bool | None,
-        Option(
-            help="Remove reasoning tags from responses (true to remove, false to leave - true by default).",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = True,
-    reasoning_tags: Annotated[
-        str | None,
-        Option(
-            help="List of reasoning tags (provided as pairs) to remove from responses. Default is [('<think>', '</think>')].",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = None,
+    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
+    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
 ):
     """
     Evaluate models using nanotron as backend.
diff --git a/src/lighteval/main_sglang.py b/src/lighteval/main_sglang.py
index c458bcc01..89867fd84 100644
--- a/src/lighteval/main_sglang.py
+++ b/src/lighteval/main_sglang.py
@@ -19,94 +19,55 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
-from typing import Optional
 
-from typer import Argument, Option
-from typing_extensions import Annotated
-
-
-HELP_PANEL_NAME_1 = "Common Parameters"
-HELP_PANEL_NAME_2 = "Logging Parameters"
-HELP_PANEL_NAME_3 = "Debug Parameters"
-HELP_PANEL_NAME_4 = "Modeling Parameters"
+from lighteval.cli_args import (
+    DEFAULT_VALUES,
+    CustomTasks,
+    DatasetLoadingProcesses,
+    JobId,
+    LoadResponsesFromDetailsDateId,
+    MaxSamples,
+    ModelArgs,
+    NumFewshotSeeds,
+    OutputDir,
+    PublicRun,
+    PushToHub,
+    PushToTensorboard,
+    ReasoningTags,
+    RemoveReasoningTags,
+    ResultsOrg,
+    ResultsPathTemplate,
+    SaveDetails,
+    Tasks,
+    Wandb,
+)
 
 
 def sglang(
     # === general ===
-    model_args: Annotated[
-        str,
-        Argument(
-            help="Model arguments in the form key1=value1,key2=value2,... or path to yaml config file (see examples/model_configs/transformers_model.yaml)"
-        ),
-    ],
-    tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
+    model_args: ModelArgs,
+    tasks: Tasks,
     # === Common parameters ===
-    dataset_loading_processes: Annotated[
-        int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    custom_tasks: Annotated[
-        Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
-    num_fewshot_seeds: Annotated[
-        int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    load_responses_from_details_date_id: Annotated[
-        Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
-    remove_reasoning_tags: Annotated[
-        bool | None,
-        Option(
-            help="Remove reasoning tags from responses (true to remove, false to leave - true by default).",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = True,
-    reasoning_tags: Annotated[
-        str | None,
-        Option(
-            help="List of reasoning tags (provided as pairs) to remove from responses. Default is [('<think>', '</think>')].",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = None,
+    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
+    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
+    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
+    load_responses_from_details_date_id: LoadResponsesFromDetailsDateId = DEFAULT_VALUES[
+        "load_responses_from_details_date_id"
+    ],
+    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
+    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
     # === saving ===
-    output_dir: Annotated[
-        str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = "results",
-    results_path_template: Annotated[
-        str | None,
-        Option(
-            help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
-            rich_help_panel=HELP_PANEL_NAME_2,
-        ),
-    ] = None,
-    push_to_hub: Annotated[
-        bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    push_to_tensorboard: Annotated[
-        bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    public_run: Annotated[
-        bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    results_org: Annotated[
-        Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = None,
-    save_details: Annotated[
-        bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    wandb: Annotated[
-        bool,
-        Option(
-            help="Push results to wandb or trackio if available. We use env variable to configure trackio or wandb. see here: https://docs.wandb.ai/guides/track/environment-variables/, https://github.com/gradio-app/trackio",
-            rich_help_panel=HELP_PANEL_NAME_2,
-        ),
-    ] = False,
+    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
+    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
+    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
+    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
+    public_run: PublicRun = DEFAULT_VALUES["public_run"],
+    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
+    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
+    wandb: Wandb = DEFAULT_VALUES["wandb"],
     # === debug ===
-    max_samples: Annotated[
-        Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = None,
-    job_id: Annotated[
-        int, Option(help="Optional job id for future reference.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = 0,
+    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
+    job_id: JobId = DEFAULT_VALUES["job_id"],
 ):
     """
     Evaluate models using sglang as backend.
diff --git a/src/lighteval/main_vllm.py b/src/lighteval/main_vllm.py
index fe243c317..7f381f19b 100644
--- a/src/lighteval/main_vllm.py
+++ b/src/lighteval/main_vllm.py
@@ -19,97 +19,64 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
+
 from typing import Optional
 
-from typer import Argument, Option
+from typer import Option
 from typing_extensions import Annotated
 
-
-HELP_PANEL_NAME_1 = "Common Parameters"
-HELP_PANEL_NAME_2 = "Logging Parameters"
-HELP_PANEL_NAME_3 = "Debug Parameters"
-HELP_PANEL_NAME_4 = "Modeling Parameters"
+from lighteval.cli_args import (
+    DEFAULT_VALUES,
+    HELP_PANEL_NAME_4,
+    CustomTasks,
+    DatasetLoadingProcesses,
+    JobId,
+    LoadResponsesFromDetailsDateId,
+    MaxSamples,
+    ModelArgs,
+    NumFewshotSeeds,
+    OutputDir,
+    PublicRun,
+    PushToHub,
+    PushToTensorboard,
+    ReasoningTags,
+    RemoveReasoningTags,
+    ResultsOrg,
+    ResultsPathTemplate,
+    SaveDetails,
+    Tasks,
+    Wandb,
+)
 
 
 def vllm(
     # === general ===
-    model_args: Annotated[
-        str,
-        Argument(
-            help="Model arguments in the form key1=value1,key2=value2,... or path to yaml config file (see examples/model_configs/transformers_model.yaml)"
-        ),
-    ],
-    tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
+    model_args: ModelArgs,
+    tasks: Tasks,
     # === Common parameters ===
     cot_prompt: Annotated[
         Optional[str], Option(help="Use chain of thought prompt for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
     ] = None,
-    dataset_loading_processes: Annotated[
-        int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    custom_tasks: Annotated[
-        Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
-    num_fewshot_seeds: Annotated[
-        int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = 1,
-    load_responses_from_details_date_id: Annotated[
-        Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1)
-    ] = None,
-    remove_reasoning_tags: Annotated[
-        bool | None,
-        Option(
-            help="Remove reasoning tags from responses (true to remove, false to leave - true by default).",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = True,
-    reasoning_tags: Annotated[
-        str | None,
-        Option(
-            help="List of reasoning tags (provided as pairs) to remove from responses. Default is [('<think>', '</think>')].",
-            rich_help_panel=HELP_PANEL_NAME_1,
-        ),
-    ] = None,
+    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
+    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
+    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
+    load_responses_from_details_date_id: LoadResponsesFromDetailsDateId = DEFAULT_VALUES[
+        "load_responses_from_details_date_id"
+    ],
+    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
+    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
     # === saving ===
-    output_dir: Annotated[
-        str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = "results",
-    results_path_template: Annotated[
-        str | None,
-        Option(
-            help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
-            rich_help_panel=HELP_PANEL_NAME_2,
-        ),
-    ] = None,
-    push_to_hub: Annotated[
-        bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    push_to_tensorboard: Annotated[
-        bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    public_run: Annotated[
-        bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    results_org: Annotated[
-        Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = None,
-    save_details: Annotated[
-        bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2)
-    ] = False,
-    wandb: Annotated[
-        bool,
-        Option(
-            help="Push results to wandb or trackio if available. We use env variable to configure trackio or wandb. see here: https://docs.wandb.ai/guides/track/environment-variables/, https://github.com/gradio-app/trackio",
-            rich_help_panel=HELP_PANEL_NAME_2,
-        ),
-    ] = False,
+    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
+    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
+    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
+    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
+    public_run: PublicRun = DEFAULT_VALUES["public_run"],
+    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
+    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
+    wandb: Wandb = DEFAULT_VALUES["wandb"],
     # === debug ===
-    max_samples: Annotated[
-        Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = None,
-    job_id: Annotated[
-        int, Option(help="Optional job id for future reference.", rich_help_panel=HELP_PANEL_NAME_3)
-    ] = 0,
+    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
+    job_id: JobId = DEFAULT_VALUES["job_id"],
 ):
     """
     Evaluate models using vllm as backend.

From eeeb34a919af0b310cab8d72795587639c2714a7 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Thu, 21 Aug 2025 13:44:07 +0000
Subject: [PATCH 02/10] fix typing

---
 src/lighteval/cli_args.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lighteval/cli_args.py b/src/lighteval/cli_args.py
index 70697b906..1db2da141 100644
--- a/src/lighteval/cli_args.py
+++ b/src/lighteval/cli_args.py
@@ -56,7 +56,7 @@
 ]
 
 RemoveReasoningTags = Annotated[
-    bool | None,
+    bool,
     Option(
         help="Remove reasoning tags from responses (true to remove, false to leave - true by default).",
         rich_help_panel=HELP_PANEL_NAME_1,

From f4bf926fbff2a72022e5d247675ee39ab82d4536 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Thu, 21 Aug 2025 13:54:08 +0000
Subject: [PATCH 03/10] reasoning tags do not need to default to None to then
 be attributed to actual default

---
 src/lighteval/cli_args.py | 12 +++++-------
 src/lighteval/pipeline.py |  4 +---
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/lighteval/cli_args.py b/src/lighteval/cli_args.py
index 1db2da141..03bd0cc37 100644
--- a/src/lighteval/cli_args.py
+++ b/src/lighteval/cli_args.py
@@ -43,9 +43,7 @@
     int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
 ]
 
-CustomTasks = Annotated[
-    Optional[str], Option(help="Path to custom tasks directory.", rich_help_panel=HELP_PANEL_NAME_1)
-]
+CustomTasks = Annotated[Optional[str], Option(help="Path to custom tasks file.", rich_help_panel=HELP_PANEL_NAME_1)]
 
 NumFewshotSeeds = Annotated[
     int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
@@ -58,15 +56,15 @@
 RemoveReasoningTags = Annotated[
     bool,
     Option(
-        help="Remove reasoning tags from responses (true to remove, false to leave - true by default).",
+        help="Remove reasoning tags from responses.",
         rich_help_panel=HELP_PANEL_NAME_1,
     ),
 ]
 
 ReasoningTags = Annotated[
-    str | None,
+    str,
     Option(
-        help="List of reasoning tags (provided as pairs) to remove from responses. Default is [('<think>', '</think>')].",
+        help="List of reasoning tags (provided as pairs) to remove from responses.",
         rich_help_panel=HELP_PANEL_NAME_1,
     ),
 ]
@@ -134,7 +132,7 @@
     "num_fewshot_seeds": 1,
     "load_responses_from_details_date_id": None,
     "remove_reasoning_tags": True,
-    "reasoning_tags": None,
+    "reasoning_tags": "[('<think>', '</think>')]",
     "output_dir": "results",
     "results_path_template": None,
     "push_to_hub": False,
diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py
index eb27de58a..0c89145d5 100644
--- a/src/lighteval/pipeline.py
+++ b/src/lighteval/pipeline.py
@@ -105,7 +105,7 @@ class PipelineParameters:
     max_samples: int | None = None
     cot_prompt: str | None = None
     remove_reasoning_tags: bool = True
-    reasoning_tags: str | list[tuple[str, str]] | None = None
+    reasoning_tags: str | list[tuple[str, str]] = [("<think>", "</think>")]
     load_responses_from_details_date_id: str | None = None
     bootstrap_iters: int = 1000
 
@@ -129,8 +129,6 @@ def __post_init__(self):  # noqa C901
         elif self.launcher_type == ParallelismManager.OPENAI:
             if not is_openai_available():
                 raise ImportError(NO_OPENAI_ERROR_MSG)
-        if self.reasoning_tags is None:
-            self.reasoning_tags = [("<think>", "</think>")]
         else:
             # Convert reasoning tags to list if needed
             if not isinstance(self.reasoning_tags, list):

From 081bfa2273bd544e443b526db7edfed2cd8f5739 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Thu, 21 Aug 2025 14:23:06 +0000
Subject: [PATCH 04/10] fix typing for dataclass

---
 src/lighteval/pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py
index 0c89145d5..c13fda1a0 100644
--- a/src/lighteval/pipeline.py
+++ b/src/lighteval/pipeline.py
@@ -105,7 +105,7 @@ class PipelineParameters:
     max_samples: int | None = None
     cot_prompt: str | None = None
     remove_reasoning_tags: bool = True
-    reasoning_tags: str | list[tuple[str, str]] = [("<think>", "</think>")]
+    reasoning_tags: str | list[tuple[str, str]] = "[('<think>', '</think>')]"
     load_responses_from_details_date_id: str | None = None
     bootstrap_iters: int = 1000
 

From 7939c9fcb4027fb9b80ce059df235bcf2c42c1a0 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Thu, 21 Aug 2025 14:31:44 +0000
Subject: [PATCH 05/10] better docs for cli args

---
 src/lighteval/cli_args.py | 99 ++++++++++++++++++++++++++++++++-------
 1 file changed, 81 insertions(+), 18 deletions(-)

diff --git a/src/lighteval/cli_args.py b/src/lighteval/cli_args.py
index 03bd0cc37..31aefbe0d 100644
--- a/src/lighteval/cli_args.py
+++ b/src/lighteval/cli_args.py
@@ -40,23 +40,41 @@
 
 # Common Parameters (HELP_PANEL_NAME_1)
 DatasetLoadingProcesses = Annotated[
-    int, Option(help="Number of processes to use for dataset loading.", rich_help_panel=HELP_PANEL_NAME_1)
+    int,
+    Option(
+        help="Number of parallel processes to use for loading datasets. Higher values can speed up dataset loading but use more memory.",
+        rich_help_panel=HELP_PANEL_NAME_1,
+    ),
 ]
 
-CustomTasks = Annotated[Optional[str], Option(help="Path to custom tasks file.", rich_help_panel=HELP_PANEL_NAME_1)]
+CustomTasks = Annotated[
+    Optional[str],
+    Option(
+        help="Path to a Python file containing custom task definitions. The file should define a TASKS_TABLE with LightevalTaskConfig objects.",
+        rich_help_panel=HELP_PANEL_NAME_1,
+    ),
+]
 
 NumFewshotSeeds = Annotated[
-    int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
+    int,
+    Option(
+        help="Number of different random seeds to use for few-shot evaluation. Each seed will generate different few-shot examples, providing more robust evaluation.",
+        rich_help_panel=HELP_PANEL_NAME_1,
+    ),
 ]
 
 LoadResponsesFromDetailsDateId = Annotated[
-    Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1)
+    Optional[str],
+    Option(
+        help="Load previously generated model responses from a specific evaluation run instead of running the model. Use the timestamp/date_id from a previous run's details directory.",
+        rich_help_panel=HELP_PANEL_NAME_1,
+    ),
 ]
 
 RemoveReasoningTags = Annotated[
     bool,
     Option(
-        help="Remove reasoning tags from responses.",
+        help="Whether to remove reasoning tags from model responses before computing metrics.",
         rich_help_panel=HELP_PANEL_NAME_1,
     ),
 ]
@@ -64,43 +82,73 @@
 ReasoningTags = Annotated[
     str,
     Option(
-        help="List of reasoning tags (provided as pairs) to remove from responses.",
+        help="List of reasoning tag pairs to remove from responses, formatted as a Python list of tuples.",
         rich_help_panel=HELP_PANEL_NAME_1,
     ),
 ]
 
 
 # Logging Parameters (HELP_PANEL_NAME_2)
-OutputDir = Annotated[str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)]
+OutputDir = Annotated[
+    str,
+    Option(
+        help="Directory where evaluation results and details will be saved. Supports fsspec-compliant paths (local, s3, hf hub, etc.).",
+        rich_help_panel=HELP_PANEL_NAME_2,
+    ),
+]
 
 ResultsPathTemplate = Annotated[
     str | None,
     Option(
-        help="Template path for where to save the results, you have access to 3 variables, `output_dir`, `org` and `model`. for example a template can be `'{output_dir}/1234/{org}+{model}'`",
+        help="Custom template for results file path. Available variables: {output_dir}, {org}, {model}. Example: '{output_dir}/experiments/{org}_{model}' creates results in a subdirectory.",
         rich_help_panel=HELP_PANEL_NAME_2,
     ),
 ]
 
-PushToHub = Annotated[bool, Option(help="Push results to the huggingface hub.", rich_help_panel=HELP_PANEL_NAME_2)]
+PushToHub = Annotated[
+    bool,
+    Option(
+        help="Whether to push evaluation results and details to the Hugging Face Hub. Requires --results-org to be set.",
+        rich_help_panel=HELP_PANEL_NAME_2,
+    ),
+]
 
-PushToTensorboard = Annotated[bool, Option(help="Push results to tensorboard.", rich_help_panel=HELP_PANEL_NAME_2)]
+PushToTensorboard = Annotated[
+    bool,
+    Option(
+        help="Whether to create and push TensorBoard logs to the Hugging Face Hub. Requires --results-org to be set.",
+        rich_help_panel=HELP_PANEL_NAME_2,
+    ),
+]
 
 PublicRun = Annotated[
-    bool, Option(help="Push results and details to a public repo.", rich_help_panel=HELP_PANEL_NAME_2)
+    bool,
+    Option(
+        help="Whether to make the uploaded results and details public on the Hugging Face Hub. If False, datasets will be private.",
+        rich_help_panel=HELP_PANEL_NAME_2,
+    ),
 ]
 
 ResultsOrg = Annotated[
-    Optional[str], Option(help="Organization to push results to.", rich_help_panel=HELP_PANEL_NAME_2)
+    Optional[str],
+    Option(
+        help="Hugging Face organization where results will be pushed. Required when using --push-to-hub or --push-to-tensorboard.",
+        rich_help_panel=HELP_PANEL_NAME_2,
+    ),
 ]
 
 SaveDetails = Annotated[
-    bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2)
+    bool,
+    Option(
+        help="Whether to save detailed per-sample results including model inputs, outputs, and metrics. Useful for analysis and debugging.",
+        rich_help_panel=HELP_PANEL_NAME_2,
+    ),
 ]
 
 Wandb = Annotated[
     bool,
     Option(
-        help="Push results to wandb or trackio if available. We use env variable to configure trackio or wandb. see here: https://docs.wandb.ai/guides/track/environment-variables/, https://github.com/gradio-app/trackio",
+        help="Whether to log results to Weights & Biases (wandb) or Trackio. Configure with environment variables: WANDB_PROJECT, WANDB_SPACE_ID, etc. See wandb docs for full configuration options.",
         rich_help_panel=HELP_PANEL_NAME_2,
     ),
 ]
@@ -108,19 +156,34 @@
 
 # Debug Parameters (HELP_PANEL_NAME_3)
 MaxSamples = Annotated[
-    Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
+    Optional[int],
+    Option(
+        help="Maximum number of samples to evaluate per task. Useful for quick testing or debugging. If None, evaluates on all available samples.",
+        rich_help_panel=HELP_PANEL_NAME_3,
+    ),
 ]
 
-JobId = Annotated[int, Option(help="Optional job id for future reference.", rich_help_panel=HELP_PANEL_NAME_3)]
+JobId = Annotated[
+    int,
+    Option(
+        help="Optional job identifier for tracking and organizing multiple evaluation runs. Useful in cluster environments.",
+        rich_help_panel=HELP_PANEL_NAME_3,
+    ),
+]
 
 
 # Common argument patterns
-Tasks = Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")]
+Tasks = Annotated[
+    str,
+    Argument(
+        help="Comma-separated list of tasks to evaluate. Format: 'task1,task2' or 'suite|task|version|split'. Use 'lighteval tasks list' to see available tasks."
+    ),
+]
 
 ModelArgs = Annotated[
     str,
     Argument(
-        help="Model arguments in the form key1=value1,key2=value2,... or path to yaml config file (see examples/model_configs/transformers_model.yaml)"
+        help="Model configuration in key=value format (e.g., 'pretrained=model_name,device=cuda') or path to YAML config file. See examples/model_configs/ for template files."
     ),
 ]
 

From 35c6c6610a15b4a0f663d2f95bfd3419b8ea22ca Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Fri, 22 Aug 2025 07:56:16 +0000
Subject: [PATCH 06/10] fix reasoning tags parsing

---
 src/lighteval/pipeline.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py
index c13fda1a0..3a904ffac 100644
--- a/src/lighteval/pipeline.py
+++ b/src/lighteval/pipeline.py
@@ -129,24 +129,24 @@ def __post_init__(self):  # noqa C901
         elif self.launcher_type == ParallelismManager.OPENAI:
             if not is_openai_available():
                 raise ImportError(NO_OPENAI_ERROR_MSG)
-        else:
-            # Convert reasoning tags to list if needed
-            if not isinstance(self.reasoning_tags, list):
-                try:
-                    self.reasoning_tags = ast.literal_eval(self.reasoning_tags)
-                except ValueError as e:
-                    raise ValueError(
-                        "reasoning_tags must be a list of pair tuples, e.g. [('start_tag', 'end_tag'), ...]. "
-                        f"Got {self.reasoning_tags} instead, which caused parsing error {e}."
-                    )
-
-            # Make sure format is correct
-            if not all(isinstance(tag, tuple) and len(tag) == 2 for tag in self.reasoning_tags):
+
+        # Convert reasoning tags to list if needed
+        if not isinstance(self.reasoning_tags, list):
+            try:
+                self.reasoning_tags = ast.literal_eval(self.reasoning_tags)
+            except ValueError as e:
                 raise ValueError(
                     "reasoning_tags must be a list of pair tuples, e.g. [('start_tag', 'end_tag'), ...]. "
-                    f"Got {self.reasoning_tags} instead."
+                    f"Got {self.reasoning_tags} instead, which caused parsing error {e}."
                 )
 
+        # Make sure format is correct
+        if not all(isinstance(tag, tuple) and len(tag) == 2 for tag in self.reasoning_tags):
+            raise ValueError(
+                "reasoning_tags must be a list of pair tuples, e.g. [('start_tag', 'end_tag'), ...]. "
+                f"Got {self.reasoning_tags} instead."
+            )
+
 
 class Pipeline:
     def __init__(

From c319160380ee7ba950902517d924c861513bff38 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Tue, 26 Aug 2025 09:35:29 +0000
Subject: [PATCH 07/10] update from suggestion

---
 src/lighteval/cli_args.py        | 362 +++++++++++++++++--------------
 src/lighteval/main_accelerate.py |  75 +++----
 src/lighteval/main_baseline.py   |  21 +-
 src/lighteval/main_custom.py     |  61 +++---
 src/lighteval/main_endpoint.py   | 175 +++++++--------
 src/lighteval/main_nanotron.py   |   9 +-
 src/lighteval/main_sglang.py     |  75 +++----
 src/lighteval/main_tasks.py      |   7 +-
 src/lighteval/main_vllm.py       |  75 +++----
 9 files changed, 442 insertions(+), 418 deletions(-)

diff --git a/src/lighteval/cli_args.py b/src/lighteval/cli_args.py
index 31aefbe0d..472941ad7 100644
--- a/src/lighteval/cli_args.py
+++ b/src/lighteval/cli_args.py
@@ -25,7 +25,8 @@
 This module exports pre-defined argument types to reduce redundancy across main_*.py files.
 """
 
-from typing import Optional
+from dataclasses import dataclass
+from typing import Any, Optional
 
 from typer import Argument, Option
 from typing_extensions import Annotated
@@ -38,172 +39,213 @@
 HELP_PANEL_NAME_4 = "Modeling Parameters"
 
 
+@dataclass
+class Arg:
+    """Base class for CLI arguments with type and default value."""
+
+    type: Annotated
+    default: Any
+
+
 # Common Parameters (HELP_PANEL_NAME_1)
-DatasetLoadingProcesses = Annotated[
-    int,
-    Option(
-        help="Number of parallel processes to use for loading datasets. Higher values can speed up dataset loading but use more memory.",
-        rich_help_panel=HELP_PANEL_NAME_1,
-    ),
-]
-
-CustomTasks = Annotated[
-    Optional[str],
-    Option(
-        help="Path to a Python file containing custom task definitions. The file should define a TASKS_TABLE with LightevalTaskConfig objects.",
-        rich_help_panel=HELP_PANEL_NAME_1,
-    ),
-]
-
-NumFewshotSeeds = Annotated[
-    int,
-    Option(
-        help="Number of different random seeds to use for few-shot evaluation. Each seed will generate different few-shot examples, providing more robust evaluation.",
-        rich_help_panel=HELP_PANEL_NAME_1,
-    ),
-]
-
-LoadResponsesFromDetailsDateId = Annotated[
-    Optional[str],
-    Option(
-        help="Load previously generated model responses from a specific evaluation run instead of running the model. Use the timestamp/date_id from a previous run's details directory.",
-        rich_help_panel=HELP_PANEL_NAME_1,
-    ),
-]
-
-RemoveReasoningTags = Annotated[
-    bool,
-    Option(
-        help="Whether to remove reasoning tags from model responses before computing metrics.",
-        rich_help_panel=HELP_PANEL_NAME_1,
-    ),
-]
-
-ReasoningTags = Annotated[
-    str,
-    Option(
-        help="List of reasoning tag pairs to remove from responses, formatted as a Python list of tuples.",
-        rich_help_panel=HELP_PANEL_NAME_1,
-    ),
-]
+dataset_loading_processes = Arg(
+    type=Annotated[
+        int,
+        Option(
+            help="Number of parallel processes to use for loading datasets. Higher values can speed up dataset loading but use more memory.",
+            rich_help_panel=HELP_PANEL_NAME_1,
+        ),
+    ],
+    default=1,
+)
+
+custom_tasks = Arg(
+    type=Annotated[
+        Optional[str],
+        Option(
+            help="Path to a Python file containing custom task definitions. The file should define a TASKS_TABLE with LightevalTaskConfig objects.",
+            rich_help_panel=HELP_PANEL_NAME_1,
+        ),
+    ],
+    default=None,
+)
+
+num_fewshot_seeds = Arg(
+    type=Annotated[
+        int,
+        Option(
+            help="Number of different random seeds to use for few-shot evaluation. Each seed will generate different few-shot examples, providing more robust evaluation.",
+            rich_help_panel=HELP_PANEL_NAME_1,
+        ),
+    ],
+    default=1,
+)
+
+load_responses_from_details_date_id = Arg(
+    type=Annotated[
+        Optional[str],
+        Option(
+            help="Load previously generated model responses from a specific evaluation run instead of running the model. Use the timestamp/date_id from a previous run's details directory.",
+            rich_help_panel=HELP_PANEL_NAME_1,
+        ),
+    ],
+    default=None,
+)
+
+remove_reasoning_tags = Arg(
+    type=Annotated[
+        bool,
+        Option(
+            help="Whether to remove reasoning tags from model responses before computing metrics.",
+            rich_help_panel=HELP_PANEL_NAME_1,
+        ),
+    ],
+    default=True,
+)
+
+reasoning_tags = Arg(
+    type=Annotated[
+        str,
+        Option(
+            help="List of reasoning tag pairs to remove from responses, formatted as a Python list of tuples.",
+            rich_help_panel=HELP_PANEL_NAME_1,
+        ),
+    ],
+    default="[('<think>', '</think>')]",
+)
 
 
 # Logging Parameters (HELP_PANEL_NAME_2)
-OutputDir = Annotated[
-    str,
-    Option(
-        help="Directory where evaluation results and details will be saved. Supports fsspec-compliant paths (local, s3, hf hub, etc.).",
-        rich_help_panel=HELP_PANEL_NAME_2,
-    ),
-]
-
-ResultsPathTemplate = Annotated[
-    str | None,
-    Option(
-        help="Custom template for results file path. Available variables: {output_dir}, {org}, {model}. Example: '{output_dir}/experiments/{org}_{model}' creates results in a subdirectory.",
-        rich_help_panel=HELP_PANEL_NAME_2,
-    ),
-]
-
-PushToHub = Annotated[
-    bool,
-    Option(
-        help="Whether to push evaluation results and details to the Hugging Face Hub. Requires --results-org to be set.",
-        rich_help_panel=HELP_PANEL_NAME_2,
-    ),
-]
-
-PushToTensorboard = Annotated[
-    bool,
-    Option(
-        help="Whether to create and push TensorBoard logs to the Hugging Face Hub. Requires --results-org to be set.",
-        rich_help_panel=HELP_PANEL_NAME_2,
-    ),
-]
-
-PublicRun = Annotated[
-    bool,
-    Option(
-        help="Whether to make the uploaded results and details public on the Hugging Face Hub. If False, datasets will be private.",
-        rich_help_panel=HELP_PANEL_NAME_2,
-    ),
-]
-
-ResultsOrg = Annotated[
-    Optional[str],
-    Option(
-        help="Hugging Face organization where results will be pushed. Required when using --push-to-hub or --push-to-tensorboard.",
-        rich_help_panel=HELP_PANEL_NAME_2,
-    ),
-]
-
-SaveDetails = Annotated[
-    bool,
-    Option(
-        help="Whether to save detailed per-sample results including model inputs, outputs, and metrics. Useful for analysis and debugging.",
-        rich_help_panel=HELP_PANEL_NAME_2,
-    ),
-]
-
-Wandb = Annotated[
-    bool,
-    Option(
-        help="Whether to log results to Weights & Biases (wandb) or Trackio. Configure with environment variables: WANDB_PROJECT, WANDB_SPACE_ID, etc. See wandb docs for full configuration options.",
-        rich_help_panel=HELP_PANEL_NAME_2,
-    ),
-]
+output_dir = Arg(
+    type=Annotated[
+        str,
+        Option(
+            help="Directory where evaluation results and details will be saved. Supports fsspec-compliant paths (local, s3, hf hub, etc.).",
+            rich_help_panel=HELP_PANEL_NAME_2,
+        ),
+    ],
+    default="results",
+)
+
+results_path_template = Arg(
+    type=Annotated[
+        str | None,
+        Option(
+            help="Custom template for results file path. Available variables: {output_dir}, {org}, {model}. Example: '{output_dir}/experiments/{org}_{model}' creates results in a subdirectory.",
+            rich_help_panel=HELP_PANEL_NAME_2,
+        ),
+    ],
+    default=None,
+)
+
+push_to_hub = Arg(
+    type=Annotated[
+        bool,
+        Option(
+            help="Whether to push evaluation results and details to the Hugging Face Hub. Requires --results-org to be set.",
+            rich_help_panel=HELP_PANEL_NAME_2,
+        ),
+    ],
+    default=False,
+)
+
+push_to_tensorboard = Arg(
+    type=Annotated[
+        bool,
+        Option(
+            help="Whether to create and push TensorBoard logs to the Hugging Face Hub. Requires --results-org to be set.",
+            rich_help_panel=HELP_PANEL_NAME_2,
+        ),
+    ],
+    default=False,
+)
+
+public_run = Arg(
+    type=Annotated[
+        bool,
+        Option(
+            help="Whether to make the uploaded results and details public on the Hugging Face Hub. If False, datasets will be private.",
+            rich_help_panel=HELP_PANEL_NAME_2,
+        ),
+    ],
+    default=False,
+)
+
+results_org = Arg(
+    type=Annotated[
+        Optional[str],
+        Option(
+            help="Hugging Face organization where results will be pushed. Required when using --push-to-hub or --push-to-tensorboard.",
+            rich_help_panel=HELP_PANEL_NAME_2,
+        ),
+    ],
+    default=None,
+)
+
+save_details = Arg(
+    type=Annotated[
+        bool,
+        Option(
+            help="Whether to save detailed per-sample results including model inputs, outputs, and metrics. Useful for analysis and debugging.",
+            rich_help_panel=HELP_PANEL_NAME_2,
+        ),
+    ],
+    default=False,
+)
+
+wandb = Arg(
+    type=Annotated[
+        bool,
+        Option(
+            help="Whether to log results to Weights & Biases (wandb) or Trackio. Configure with environment variables: WANDB_PROJECT, WANDB_SPACE_ID, etc. See wandb docs for full configuration options.",
+            rich_help_panel=HELP_PANEL_NAME_2,
+        ),
+    ],
+    default=False,
+)
 
 
 # Debug Parameters (HELP_PANEL_NAME_3)
-MaxSamples = Annotated[
-    Optional[int],
-    Option(
-        help="Maximum number of samples to evaluate per task. Useful for quick testing or debugging. If None, evaluates on all available samples.",
-        rich_help_panel=HELP_PANEL_NAME_3,
-    ),
-]
-
-JobId = Annotated[
-    int,
-    Option(
-        help="Optional job identifier for tracking and organizing multiple evaluation runs. Useful in cluster environments.",
-        rich_help_panel=HELP_PANEL_NAME_3,
-    ),
-]
+max_samples = Arg(
+    type=Annotated[
+        Optional[int],
+        Option(
+            help="Maximum number of samples to evaluate per task. Useful for quick testing or debugging. If None, evaluates on all available samples.",
+            rich_help_panel=HELP_PANEL_NAME_3,
+        ),
+    ],
+    default=None,
+)
+
+job_id = Arg(
+    type=Annotated[
+        int,
+        Option(
+            help="Optional job identifier for tracking and organizing multiple evaluation runs. Useful in cluster environments.",
+            rich_help_panel=HELP_PANEL_NAME_3,
+        ),
+    ],
+    default=0,
+)
 
 
 # Common argument patterns
-Tasks = Annotated[
-    str,
-    Argument(
-        help="Comma-separated list of tasks to evaluate. Format: 'task1,task2' or 'suite|task|version|split'. Use 'lighteval tasks list' to see available tasks."
-    ),
-]
-
-ModelArgs = Annotated[
-    str,
-    Argument(
-        help="Model configuration in key=value format (e.g., 'pretrained=model_name,device=cuda') or path to YAML config file. See examples/model_configs/ for template files."
-    ),
-]
-
-
-# Default values for common arguments
-DEFAULT_VALUES = {
-    "dataset_loading_processes": 1,
-    "custom_tasks": None,
-    "num_fewshot_seeds": 1,
-    "load_responses_from_details_date_id": None,
-    "remove_reasoning_tags": True,
-    "reasoning_tags": "[('<think>', '</think>')]",
-    "output_dir": "results",
-    "results_path_template": None,
-    "push_to_hub": False,
-    "push_to_tensorboard": False,
-    "public_run": False,
-    "results_org": None,
-    "save_details": False,
-    "wandb": False,
-    "max_samples": None,
-    "job_id": 0,
-}
+tasks = Arg(
+    type=Annotated[
+        str,
+        Argument(
+            help="Comma-separated list of tasks to evaluate. Format: 'task1,task2' or 'suite|task|version|split'. Use 'lighteval tasks list' to see available tasks."
+        ),
+    ],
+    default=None,  # Required argument, no default
+)
+
+model_args = Arg(
+    type=Annotated[
+        str,
+        Argument(
+            help="Model configuration in key=value format (e.g., 'pretrained=model_name,device=cuda') or path to YAML config file. See examples/model_configs/ for template files."
+        ),
+    ],
+    default=None,  # Required argument, no default
+)
diff --git a/src/lighteval/main_accelerate.py b/src/lighteval/main_accelerate.py
index ab9d74aa5..1e5726f86 100644
--- a/src/lighteval/main_accelerate.py
+++ b/src/lighteval/main_accelerate.py
@@ -26,26 +26,25 @@
 from typing_extensions import Annotated
 
 from lighteval.cli_args import (
-    DEFAULT_VALUES,
     HELP_PANEL_NAME_4,
-    CustomTasks,
-    DatasetLoadingProcesses,
-    JobId,
-    LoadResponsesFromDetailsDateId,
-    MaxSamples,
-    ModelArgs,
-    NumFewshotSeeds,
-    OutputDir,
-    PublicRun,
-    PushToHub,
-    PushToTensorboard,
-    ReasoningTags,
-    RemoveReasoningTags,
-    ResultsOrg,
-    ResultsPathTemplate,
-    SaveDetails,
-    Tasks,
-    Wandb,
+    custom_tasks,
+    dataset_loading_processes,
+    job_id,
+    load_responses_from_details_date_id,
+    max_samples,
+    model_args,
+    num_fewshot_seeds,
+    output_dir,
+    public_run,
+    push_to_hub,
+    push_to_tensorboard,
+    reasoning_tags,
+    remove_reasoning_tags,
+    results_org,
+    results_path_template,
+    save_details,
+    tasks,
+    wandb,
 )
 
 
@@ -54,32 +53,30 @@
 
 def accelerate(  # noqa C901
     # === general ===
-    model_args: ModelArgs,
-    tasks: Tasks,
+    model_args: model_args.type,
+    tasks: tasks.type,
     # === Common parameters ===
     vision_model: Annotated[
         bool, Option(help="Use vision model for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
     ] = False,
-    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
-    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
-    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
-    load_responses_from_details_date_id: LoadResponsesFromDetailsDateId = DEFAULT_VALUES[
-        "load_responses_from_details_date_id"
-    ],
-    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
-    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
+    dataset_loading_processes: dataset_loading_processes.type = dataset_loading_processes.default,
+    custom_tasks: custom_tasks.type = custom_tasks.default,
+    num_fewshot_seeds: num_fewshot_seeds.type = num_fewshot_seeds.default,
+    load_responses_from_details_date_id: load_responses_from_details_date_id.type = load_responses_from_details_date_id.default,
+    remove_reasoning_tags: remove_reasoning_tags.type = remove_reasoning_tags.default,
+    reasoning_tags: reasoning_tags.type = reasoning_tags.default,
     # === saving ===
-    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
-    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
-    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
-    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
-    public_run: PublicRun = DEFAULT_VALUES["public_run"],
-    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
-    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
-    wandb: Wandb = DEFAULT_VALUES["wandb"],
+    output_dir: output_dir.type = output_dir.default,
+    results_path_template: results_path_template.type = results_path_template.default,
+    push_to_hub: push_to_hub.type = push_to_hub.default,
+    push_to_tensorboard: push_to_tensorboard.type = push_to_tensorboard.default,
+    public_run: public_run.type = public_run.default,
+    results_org: results_org.type = results_org.default,
+    save_details: save_details.type = save_details.default,
+    wandb: wandb.type = wandb.default,
     # === debug ===
-    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
-    job_id: JobId = DEFAULT_VALUES["job_id"],
+    max_samples: max_samples.type = max_samples.default,
+    job_id: job_id.type = job_id.default,
 ):
     """
     Evaluate models using accelerate and transformers as backend.
diff --git a/src/lighteval/main_baseline.py b/src/lighteval/main_baseline.py
index 035cad276..7d4d34248 100644
--- a/src/lighteval/main_baseline.py
+++ b/src/lighteval/main_baseline.py
@@ -22,21 +22,20 @@
 
 
 from lighteval.cli_args import (
-    DEFAULT_VALUES,
-    CustomTasks,
-    DatasetLoadingProcesses,
-    MaxSamples,
-    OutputDir,
-    Tasks,
+    custom_tasks,
+    dataset_loading_processes,
+    max_samples,
+    output_dir,
+    tasks,
 )
 
 
 def baseline(
-    tasks: Tasks,
-    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
-    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
-    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
-    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
+    tasks: tasks.type,
+    custom_tasks: custom_tasks.type = custom_tasks.default,
+    dataset_loading_processes: dataset_loading_processes.type = dataset_loading_processes.default,
+    output_dir: output_dir.type = output_dir.default,
+    max_samples: max_samples.type = max_samples.default,
 ):
     """
     Compute baselines for given tasks.
diff --git a/src/lighteval/main_custom.py b/src/lighteval/main_custom.py
index d2152b585..14507ae8d 100644
--- a/src/lighteval/main_custom.py
+++ b/src/lighteval/main_custom.py
@@ -26,22 +26,21 @@
 from typing_extensions import Annotated
 
 from lighteval.cli_args import (
-    DEFAULT_VALUES,
-    CustomTasks,
-    DatasetLoadingProcesses,
-    JobId,
-    MaxSamples,
-    NumFewshotSeeds,
-    OutputDir,
-    PublicRun,
-    PushToHub,
-    PushToTensorboard,
-    ReasoningTags,
-    RemoveReasoningTags,
-    ResultsOrg,
-    ResultsPathTemplate,
-    SaveDetails,
-    Tasks,
+    custom_tasks,
+    dataset_loading_processes,
+    job_id,
+    max_samples,
+    num_fewshot_seeds,
+    output_dir,
+    public_run,
+    push_to_hub,
+    push_to_tensorboard,
+    reasoning_tags,
+    remove_reasoning_tags,
+    results_org,
+    results_path_template,
+    save_details,
+    tasks,
 )
 from lighteval.models.custom.custom_model import CustomModelConfig
 
@@ -54,24 +53,24 @@ def custom(
     # === general ===
     model_name: Annotated[str, Argument(help="The model name to evaluate")],
     model_definition_file_path: Annotated[str, Argument(help="The model definition file path to evaluate")],
-    tasks: Tasks,
+    tasks: tasks.type,
     # === Common parameters ===
-    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
-    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
-    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
-    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
-    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
+    dataset_loading_processes: dataset_loading_processes.type = dataset_loading_processes.default,
+    custom_tasks: custom_tasks.type = custom_tasks.default,
+    num_fewshot_seeds: num_fewshot_seeds.type = num_fewshot_seeds.default,
+    remove_reasoning_tags: remove_reasoning_tags.type = remove_reasoning_tags.default,
+    reasoning_tags: reasoning_tags.type = reasoning_tags.default,
     # === saving ===
-    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
-    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
-    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
-    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
-    public_run: PublicRun = DEFAULT_VALUES["public_run"],
-    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
-    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
+    output_dir: output_dir.type = output_dir.default,
+    results_path_template: results_path_template.type = results_path_template.default,
+    push_to_hub: push_to_hub.type = push_to_hub.default,
+    push_to_tensorboard: push_to_tensorboard.type = push_to_tensorboard.default,
+    public_run: public_run.type = public_run.default,
+    results_org: results_org.type = results_org.default,
+    save_details: save_details.type = save_details.default,
     # === debug ===
-    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
-    job_id: JobId = DEFAULT_VALUES["job_id"],
+    max_samples: max_samples.type = max_samples.default,
+    job_id: job_id.type = job_id.default,
 ):
     """
     Evaluate custom models (can be anything).
diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py
index 08c9ee33c..8b2c7602f 100644
--- a/src/lighteval/main_endpoint.py
+++ b/src/lighteval/main_endpoint.py
@@ -26,25 +26,24 @@
 from typing_extensions import Annotated
 
 from lighteval.cli_args import (
-    DEFAULT_VALUES,
     HELP_PANEL_NAME_4,
-    CustomTasks,
-    DatasetLoadingProcesses,
-    JobId,
-    LoadResponsesFromDetailsDateId,
-    MaxSamples,
-    NumFewshotSeeds,
-    OutputDir,
-    PublicRun,
-    PushToHub,
-    PushToTensorboard,
-    ReasoningTags,
-    RemoveReasoningTags,
-    ResultsOrg,
-    ResultsPathTemplate,
-    SaveDetails,
-    Tasks,
-    Wandb,
+    custom_tasks,
+    dataset_loading_processes,
+    job_id,
+    load_responses_from_details_date_id,
+    max_samples,
+    num_fewshot_seeds,
+    output_dir,
+    public_run,
+    push_to_hub,
+    push_to_tensorboard,
+    reasoning_tags,
+    remove_reasoning_tags,
+    results_org,
+    results_path_template,
+    save_details,
+    tasks,
+    wandb,
 )
 
 
@@ -57,7 +56,7 @@ def inference_endpoint(
     model_config_path: Annotated[
         str, Argument(help="Path to model config yaml file. (examples/model_configs/endpoint_model.yaml)")
     ],
-    tasks: Tasks,
+    tasks: tasks.type,
     free_endpoint: Annotated[
         bool,
         Option(
@@ -66,26 +65,24 @@ def inference_endpoint(
         ),
     ] = False,
     # === Common parameters ===
-    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
-    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
-    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
-    load_responses_from_details_date_id: LoadResponsesFromDetailsDateId = DEFAULT_VALUES[
-        "load_responses_from_details_date_id"
-    ],
-    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
-    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
+    dataset_loading_processes: dataset_loading_processes.type = dataset_loading_processes.default,
+    custom_tasks: custom_tasks.type = custom_tasks.default,
+    num_fewshot_seeds: num_fewshot_seeds.type = num_fewshot_seeds.default,
+    load_responses_from_details_date_id: load_responses_from_details_date_id.type = load_responses_from_details_date_id.default,
+    remove_reasoning_tags: remove_reasoning_tags.type = remove_reasoning_tags.default,
+    reasoning_tags: reasoning_tags.type = reasoning_tags.default,
     # === saving ===
-    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
-    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
-    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
-    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
-    public_run: PublicRun = DEFAULT_VALUES["public_run"],
-    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
-    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
-    wandb: Wandb = DEFAULT_VALUES["wandb"],
+    output_dir: output_dir.type = output_dir.default,
+    results_path_template: results_path_template.type = results_path_template.default,
+    push_to_hub: push_to_hub.type = push_to_hub.default,
+    push_to_tensorboard: push_to_tensorboard.type = push_to_tensorboard.default,
+    public_run: public_run.type = public_run.default,
+    results_org: results_org.type = results_org.default,
+    save_details: save_details.type = save_details.default,
+    wandb: wandb.type = wandb.default,
     # === debug ===
-    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
-    job_id: JobId = DEFAULT_VALUES["job_id"],
+    max_samples: max_samples.type = max_samples.default,
+    job_id: job_id.type = job_id.default,
 ):
     """
     Evaluate models using inference-endpoints as backend.
@@ -147,28 +144,26 @@ def tgi(
     model_config_path: Annotated[
         str, Argument(help="Path to model config yaml file. (examples/model_configs/tgi_model.yaml)")
     ],
-    tasks: Tasks,
+    tasks: tasks.type,
     # === Common parameters ===
-    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
-    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
-    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
-    load_responses_from_details_date_id: LoadResponsesFromDetailsDateId = DEFAULT_VALUES[
-        "load_responses_from_details_date_id"
-    ],
-    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
-    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
+    dataset_loading_processes: dataset_loading_processes.type = dataset_loading_processes.default,
+    custom_tasks: custom_tasks.type = custom_tasks.default,
+    num_fewshot_seeds: num_fewshot_seeds.type = num_fewshot_seeds.default,
+    load_responses_from_details_date_id: load_responses_from_details_date_id.type = load_responses_from_details_date_id.default,
+    remove_reasoning_tags: remove_reasoning_tags.type = remove_reasoning_tags.default,
+    reasoning_tags: reasoning_tags.type = reasoning_tags.default,
     # === saving ===
-    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
-    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
-    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
-    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
-    public_run: PublicRun = DEFAULT_VALUES["public_run"],
-    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
-    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
-    wandb: Wandb = DEFAULT_VALUES["wandb"],
+    output_dir: output_dir.type = output_dir.default,
+    results_path_template: results_path_template.type = results_path_template.default,
+    push_to_hub: push_to_hub.type = push_to_hub.default,
+    push_to_tensorboard: push_to_tensorboard.type = push_to_tensorboard.default,
+    public_run: public_run.type = public_run.default,
+    results_org: results_org.type = results_org.default,
+    save_details: save_details.type = save_details.default,
+    wandb: wandb.type = wandb.default,
     # === debug ===
-    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
-    job_id: JobId = DEFAULT_VALUES["job_id"],
+    max_samples: max_samples.type = max_samples.default,
+    job_id: job_id.type = job_id.default,
 ):
     """
     Evaluate models using TGI as backend.
@@ -237,28 +232,26 @@ def litellm(
             help="config file path for the litellm model, or a comma separated string of model args (model_name={},base_url={},provider={})"
         ),
     ],
-    tasks: Tasks,
+    tasks: tasks.type,
     # === Common parameters ===
-    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
-    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
-    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
-    load_responses_from_details_date_id: LoadResponsesFromDetailsDateId = DEFAULT_VALUES[
-        "load_responses_from_details_date_id"
-    ],
-    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
-    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
+    dataset_loading_processes: dataset_loading_processes.type = dataset_loading_processes.default,
+    custom_tasks: custom_tasks.type = custom_tasks.default,
+    num_fewshot_seeds: num_fewshot_seeds.type = num_fewshot_seeds.default,
+    load_responses_from_details_date_id: load_responses_from_details_date_id.type = load_responses_from_details_date_id.default,
+    remove_reasoning_tags: remove_reasoning_tags.type = remove_reasoning_tags.default,
+    reasoning_tags: reasoning_tags.type = reasoning_tags.default,
     # === saving ===
-    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
-    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
-    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
-    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
-    public_run: PublicRun = DEFAULT_VALUES["public_run"],
-    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
-    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
-    wandb: Wandb = DEFAULT_VALUES["wandb"],
+    output_dir: output_dir.type = output_dir.default,
+    results_path_template: results_path_template.type = results_path_template.default,
+    push_to_hub: push_to_hub.type = push_to_hub.default,
+    push_to_tensorboard: push_to_tensorboard.type = push_to_tensorboard.default,
+    public_run: public_run.type = public_run.default,
+    results_org: results_org.type = results_org.default,
+    save_details: save_details.type = save_details.default,
+    wandb: wandb.type = wandb.default,
     # === debug ===
-    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
-    job_id: JobId = DEFAULT_VALUES["job_id"],
+    max_samples: max_samples.type = max_samples.default,
+    job_id: job_id.type = job_id.default,
 ):
     """
     Evaluate models using LiteLLM as backend.
@@ -331,25 +324,25 @@ def inference_providers(
             help="config file path for the inference provider model, or a comma separated string of model args (model_name={},provider={},generation={temperature: 0.6})"
         ),
     ],
-    tasks: Tasks,
+    tasks: tasks.type,
     # === Common parameters ===
-    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
-    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
-    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
+    dataset_loading_processes: dataset_loading_processes.type = dataset_loading_processes.default,
+    custom_tasks: custom_tasks.type = custom_tasks.default,
+    num_fewshot_seeds: num_fewshot_seeds.type = num_fewshot_seeds.default,
     # === saving ===
-    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
-    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
-    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
-    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
-    public_run: PublicRun = DEFAULT_VALUES["public_run"],
-    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
-    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
-    wandb: Wandb = DEFAULT_VALUES["wandb"],
-    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
-    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
+    output_dir: output_dir.type = output_dir.default,
+    results_path_template: results_path_template.type = results_path_template.default,
+    push_to_hub: push_to_hub.type = push_to_hub.default,
+    push_to_tensorboard: push_to_tensorboard.type = push_to_tensorboard.default,
+    public_run: public_run.type = public_run.default,
+    results_org: results_org.type = results_org.default,
+    save_details: save_details.type = save_details.default,
+    wandb: wandb.type = wandb.default,
+    remove_reasoning_tags: remove_reasoning_tags.type = remove_reasoning_tags.default,
+    reasoning_tags: reasoning_tags.type = reasoning_tags.default,
     # === debug ===
-    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
-    job_id: JobId = DEFAULT_VALUES["job_id"],
+    max_samples: max_samples.type = max_samples.default,
+    job_id: job_id.type = job_id.default,
 ):
     """
     Evaluate models using HuggingFace's inference providers as backend.
diff --git a/src/lighteval/main_nanotron.py b/src/lighteval/main_nanotron.py
index 936220331..06935e69c 100644
--- a/src/lighteval/main_nanotron.py
+++ b/src/lighteval/main_nanotron.py
@@ -29,9 +29,8 @@
 from yaml import SafeLoader
 
 from lighteval.cli_args import (
-    DEFAULT_VALUES,
-    ReasoningTags,
-    RemoveReasoningTags,
+    reasoning_tags,
+    remove_reasoning_tags,
 )
 
 
@@ -43,8 +42,8 @@ def nanotron(
         str, Option(help="Path to the nanotron checkpoint YAML or python config file, potentially on s3.")
     ],
     lighteval_config_path: Annotated[str, Option(help="Path to a YAML config to be used for the evaluation.")],
-    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
-    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
+    remove_reasoning_tags: remove_reasoning_tags.type = remove_reasoning_tags.default,
+    reasoning_tags: reasoning_tags.type = reasoning_tags.default,
 ):
     """
     Evaluate models using nanotron as backend.
diff --git a/src/lighteval/main_sglang.py b/src/lighteval/main_sglang.py
index 89867fd84..135396263 100644
--- a/src/lighteval/main_sglang.py
+++ b/src/lighteval/main_sglang.py
@@ -21,53 +21,50 @@
 # SOFTWARE.
 
 from lighteval.cli_args import (
-    DEFAULT_VALUES,
-    CustomTasks,
-    DatasetLoadingProcesses,
-    JobId,
-    LoadResponsesFromDetailsDateId,
-    MaxSamples,
-    ModelArgs,
-    NumFewshotSeeds,
-    OutputDir,
-    PublicRun,
-    PushToHub,
-    PushToTensorboard,
-    ReasoningTags,
-    RemoveReasoningTags,
-    ResultsOrg,
-    ResultsPathTemplate,
-    SaveDetails,
-    Tasks,
-    Wandb,
+    custom_tasks,
+    dataset_loading_processes,
+    job_id,
+    load_responses_from_details_date_id,
+    max_samples,
+    model_args,
+    num_fewshot_seeds,
+    output_dir,
+    public_run,
+    push_to_hub,
+    push_to_tensorboard,
+    reasoning_tags,
+    remove_reasoning_tags,
+    results_org,
+    results_path_template,
+    save_details,
+    tasks,
+    wandb,
 )
 
 
 def sglang(
     # === general ===
-    model_args: ModelArgs,
-    tasks: Tasks,
+    model_args: model_args.type,
+    tasks: tasks.type,
     # === Common parameters ===
-    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
-    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
-    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
-    load_responses_from_details_date_id: LoadResponsesFromDetailsDateId = DEFAULT_VALUES[
-        "load_responses_from_details_date_id"
-    ],
-    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
-    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
+    dataset_loading_processes: dataset_loading_processes.type = dataset_loading_processes.default,
+    custom_tasks: custom_tasks.type = custom_tasks.default,
+    num_fewshot_seeds: num_fewshot_seeds.type = num_fewshot_seeds.default,
+    load_responses_from_details_date_id: load_responses_from_details_date_id.type = load_responses_from_details_date_id.default,
+    remove_reasoning_tags: remove_reasoning_tags.type = remove_reasoning_tags.default,
+    reasoning_tags: reasoning_tags.type = reasoning_tags.default,
     # === saving ===
-    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
-    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
-    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
-    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
-    public_run: PublicRun = DEFAULT_VALUES["public_run"],
-    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
-    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
-    wandb: Wandb = DEFAULT_VALUES["wandb"],
+    output_dir: output_dir.type = output_dir.default,
+    results_path_template: results_path_template.type = results_path_template.default,
+    push_to_hub: push_to_hub.type = push_to_hub.default,
+    push_to_tensorboard: push_to_tensorboard.type = push_to_tensorboard.default,
+    public_run: public_run.type = public_run.default,
+    results_org: results_org.type = results_org.default,
+    save_details: save_details.type = save_details.default,
+    wandb: wandb.type = wandb.default,
     # === debug ===
-    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
-    job_id: JobId = DEFAULT_VALUES["job_id"],
+    max_samples: max_samples.type = max_samples.default,
+    job_id: job_id.type = job_id.default,
 ):
     """
     Evaluate models using sglang as backend.
diff --git a/src/lighteval/main_tasks.py b/src/lighteval/main_tasks.py
index 80255a45a..1a283b057 100644
--- a/src/lighteval/main_tasks.py
+++ b/src/lighteval/main_tasks.py
@@ -20,12 +20,13 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 import logging
-from typing import Optional
 
 import typer
 from typer import Argument, Option
 from typing_extensions import Annotated
 
+from lighteval.cli_args import custom_tasks
+
 
 app = typer.Typer()
 
@@ -33,7 +34,7 @@
 @app.command()
 def inspect(
     tasks: Annotated[str, Argument(help="Id of tasks or path to a text file with a list of tasks")],
-    custom_tasks: Annotated[Optional[str], Option(help="Path to a file with custom tasks")] = None,
+    custom_tasks: custom_tasks.type = custom_tasks.default,
     num_samples: Annotated[int, Option(help="Number of samples to display")] = 10,
     show_config: Annotated[bool, Option(help="Will display the full task config")] = False,
 ):
@@ -65,7 +66,7 @@ def inspect(
 
 
 @app.command()
-def list(custom_tasks: Annotated[Optional[str], Option(help="Path to a file with custom tasks")] = None):
+def list(custom_tasks: custom_tasks.type = custom_tasks.default):
     """
     List all tasks
     """
diff --git a/src/lighteval/main_vllm.py b/src/lighteval/main_vllm.py
index 7f381f19b..45e40fd70 100644
--- a/src/lighteval/main_vllm.py
+++ b/src/lighteval/main_vllm.py
@@ -26,57 +26,54 @@
 from typing_extensions import Annotated
 
 from lighteval.cli_args import (
-    DEFAULT_VALUES,
     HELP_PANEL_NAME_4,
-    CustomTasks,
-    DatasetLoadingProcesses,
-    JobId,
-    LoadResponsesFromDetailsDateId,
-    MaxSamples,
-    ModelArgs,
-    NumFewshotSeeds,
-    OutputDir,
-    PublicRun,
-    PushToHub,
-    PushToTensorboard,
-    ReasoningTags,
-    RemoveReasoningTags,
-    ResultsOrg,
-    ResultsPathTemplate,
-    SaveDetails,
-    Tasks,
-    Wandb,
+    custom_tasks,
+    dataset_loading_processes,
+    job_id,
+    load_responses_from_details_date_id,
+    max_samples,
+    model_args,
+    num_fewshot_seeds,
+    output_dir,
+    public_run,
+    push_to_hub,
+    push_to_tensorboard,
+    reasoning_tags,
+    remove_reasoning_tags,
+    results_org,
+    results_path_template,
+    save_details,
+    tasks,
+    wandb,
 )
 
 
 def vllm(
     # === general ===
-    model_args: ModelArgs,
-    tasks: Tasks,
+    model_args: model_args.type,
+    tasks: tasks.type,
     # === Common parameters ===
     cot_prompt: Annotated[
         Optional[str], Option(help="Use chain of thought prompt for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
     ] = None,
-    dataset_loading_processes: DatasetLoadingProcesses = DEFAULT_VALUES["dataset_loading_processes"],
-    custom_tasks: CustomTasks = DEFAULT_VALUES["custom_tasks"],
-    num_fewshot_seeds: NumFewshotSeeds = DEFAULT_VALUES["num_fewshot_seeds"],
-    load_responses_from_details_date_id: LoadResponsesFromDetailsDateId = DEFAULT_VALUES[
-        "load_responses_from_details_date_id"
-    ],
-    remove_reasoning_tags: RemoveReasoningTags = DEFAULT_VALUES["remove_reasoning_tags"],
-    reasoning_tags: ReasoningTags = DEFAULT_VALUES["reasoning_tags"],
+    dataset_loading_processes: dataset_loading_processes.type = dataset_loading_processes.default,
+    custom_tasks: custom_tasks.type = custom_tasks.default,
+    num_fewshot_seeds: num_fewshot_seeds.type = num_fewshot_seeds.default,
+    load_responses_from_details_date_id: load_responses_from_details_date_id.type = load_responses_from_details_date_id.default,
+    remove_reasoning_tags: remove_reasoning_tags.type = remove_reasoning_tags.default,
+    reasoning_tags: reasoning_tags.type = reasoning_tags.default,
     # === saving ===
-    output_dir: OutputDir = DEFAULT_VALUES["output_dir"],
-    results_path_template: ResultsPathTemplate = DEFAULT_VALUES["results_path_template"],
-    push_to_hub: PushToHub = DEFAULT_VALUES["push_to_hub"],
-    push_to_tensorboard: PushToTensorboard = DEFAULT_VALUES["push_to_tensorboard"],
-    public_run: PublicRun = DEFAULT_VALUES["public_run"],
-    results_org: ResultsOrg = DEFAULT_VALUES["results_org"],
-    save_details: SaveDetails = DEFAULT_VALUES["save_details"],
-    wandb: Wandb = DEFAULT_VALUES["wandb"],
+    output_dir: output_dir.type = output_dir.default,
+    results_path_template: results_path_template.type = results_path_template.default,
+    push_to_hub: push_to_hub.type = push_to_hub.default,
+    push_to_tensorboard: push_to_tensorboard.type = push_to_tensorboard.default,
+    public_run: public_run.type = public_run.default,
+    results_org: results_org.type = results_org.default,
+    save_details: save_details.type = save_details.default,
+    wandb: wandb.type = wandb.default,
     # === debug ===
-    max_samples: MaxSamples = DEFAULT_VALUES["max_samples"],
-    job_id: JobId = DEFAULT_VALUES["job_id"],
+    max_samples: max_samples.type = max_samples.default,
+    job_id: job_id.type = job_id.default,
 ):
     """
     Evaluate models using vllm as backend.

From 776219e94afcc3e000c677bd06ea0ad374d08ae3 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Tue, 26 Aug 2025 09:58:49 +0000
Subject: [PATCH 08/10] styling

---
 Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index e50483f1e..bd4dfbca9 100644
--- a/Makefile
+++ b/Makefile
@@ -2,10 +2,10 @@
 
 
 style:
-	ruff format .
-	ruff check --fix .
+	uvx ruff format .
+	uvx ruff check --fix .
 
 
 quality:
-	ruff format --check .
-	ruff check .
+	uvx ruff format --check .
+	uvx ruff check .

From e29ba876bbcf3ae2ad6d03b1879d6769fcfb8050 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Tue, 26 Aug 2025 10:01:26 +0000
Subject: [PATCH 09/10] styling

---
 src/lighteval/main_tasks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lighteval/main_tasks.py b/src/lighteval/main_tasks.py
index 4fc7d59e8..706dd1a06 100644
--- a/src/lighteval/main_tasks.py
+++ b/src/lighteval/main_tasks.py
@@ -67,9 +67,9 @@ def inspect(
 
 @app.command()
 def list(
-    custom_tasks: custom_tasks.type = custom_tasks.default
+    custom_tasks: custom_tasks.type = custom_tasks.default,
     suites: Annotated[
-        Optional[str],
+        str | None,
         Option(
             help="Comma-separated list of suites to display (e.g., 'helm,harness'). Use 'all' for all suites. If not specified, shows core suites only."
         ),

From 87a8b5e11bcc03f1da29002f8fb43e4cca336ba1 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib@huggingface.co>
Date: Tue, 26 Aug 2025 10:19:03 +0000
Subject: [PATCH 10/10] styling

---
 Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index bd4dfbca9..e50483f1e 100644
--- a/Makefile
+++ b/Makefile
@@ -2,10 +2,10 @@
 
 
 style:
-	uvx ruff format .
-	uvx ruff check --fix .
+	ruff format .
+	ruff check --fix .
 
 
 quality:
-	uvx ruff format --check .
-	uvx ruff check .
+	ruff format --check .
+	ruff check .