huggingface · NathanHB · Nov 12, 2025 · Nov 12, 2025 · Nov 12, 2025 · Nov 12, 2025
diff --git a/src/lighteval/main_tasks.py b/src/lighteval/main_tasks.py
@@ -19,6 +19,7 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
+import json
 import logging
 
 import typer
@@ -92,3 +93,17 @@ def create(template: str, task_name: str, dataset_name: str):
         f.write(content)
 
     logger.info(f"Task created in custom_{task_name}_task.py")
+
+
+@app.command()
+def dump(
+    load_tasks_multilingual: load_tasks_multilingual.type = load_tasks_multilingual.default,
+    custom_tasks: custom_tasks.type = custom_tasks.default,
+):
+    """Dump all task names, metadata, and docstrings as JSON"""
+    from lighteval.tasks.registry import Registry
+
+    registry = Registry(custom_tasks=custom_tasks, load_multilingual=load_tasks_multilingual)
+    modules_data = registry.get_tasks_dump()
+
+    print(json.dumps(modules_data, indent=2, default=str))
diff --git a/src/lighteval/tasks/multilingual/tasks/global_mmlu.py b/src/lighteval/tasks/multilingual/tasks/global_mmlu.py
@@ -35,8 +35,6 @@
 from lighteval.tasks.multilingual.utils.task_utils import get_metrics_for_formulation
 from lighteval.tasks.templates.multichoice import get_mcq_prompt_function
 from lighteval.tasks.templates.utils.formulation import (
-    CFFormulation,
-    HybridFormulation,
     MCFFormulation,
 )
 from lighteval.utils.language import Language
@@ -176,8 +174,6 @@
     ]
     for formulation in [
         MCFFormulation(),
-        CFFormulation(),
-        HybridFormulation(),
     ]
-    for sensitivity_label in ["ALL", "CA", "CS", "UNK"]
+    for sensitivity_label in ["ALL"]
 ]
diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py
@@ -25,10 +25,12 @@
 import copy
 import importlib
 import importlib.util
+import inspect
 import logging
 import os
 import sys
 import time
+from dataclasses import asdict
 from functools import lru_cache
 from itertools import groupby
 from pathlib import Path
@@ -149,6 +151,9 @@ def __init__(
         else:
             self.tasks_list = self._get_full_task_list_from_input_string(tasks)
 
+        self._load_multilingual = load_multilingual
+        self._custom_tasks = custom_tasks
+
         self._task_registry = Registry.load_all_task_configs(
             custom_tasks=custom_tasks, load_multilingual=load_multilingual
         )
@@ -433,3 +438,125 @@ def print_all_tasks(self, suites: str | None = None):
         # Print summary
         total_tasks = len([t for t in tasks_names if t.split("|")[1]])
         print(f"\nTotal tasks displayed: {total_tasks}")
+
+    def get_tasks_dump(self) -> list[dict]:  # noqa: C901
+        """Get all task names, metadata, and docstrings as a Python object.
+
+        Returns:
+            list[dict]: List of dictionaries, each containing:
+                - module: Module name
+                - docstring: Parsed docstring as dict
+                - tasks: List of task configs for this module
+        """
+        task_configs = self._task_registry
+
+        TASKS_DIR = Path(__file__).parent / "tasks"
+        TASKS_DIR_MULTILINGUAL = Path(__file__).parent / "multilingual" / "tasks"
+
+        task_files = [f for f in TASKS_DIR.glob("*.py") if f.name != "__init__.py"]
+        task_files_multilingual = [f for f in TASKS_DIR_MULTILINGUAL.glob("*.py") if f.name != "__init__.py"]
+        task_subdirs = [d for d in TASKS_DIR.iterdir() if d.is_dir() and (d / "main.py").exists()]
+
+        module_to_docstring = {}
+
+        for task_file in task_files:
+            module_name = task_file.stem
+            module = importlib.import_module(f"lighteval.tasks.tasks.{module_name}")
+            docstring = (inspect.getdoc(module) or module.__doc__ or "").strip()
+            module_to_docstring[module] = docstring
+
+        if self._load_multilingual:
+            for task_file in task_files_multilingual:
+                module_name = task_file.stem
+                module = importlib.import_module(f"lighteval.tasks.multilingual.tasks.{module_name}")
+                docstring = (inspect.getdoc(module) or module.__doc__ or "").strip()
+                module_to_docstring[module] = docstring
+
+        for task_dir in task_subdirs:
+            module_name = task_dir.name
+            module = importlib.import_module(f"lighteval.tasks.tasks.{module_name}.main")
+            docstring = (inspect.getdoc(module) or module.__doc__ or "").strip()
+            module_to_docstring[module] = docstring
+
+        if self._custom_tasks is not None:
+            custom_tasks_module = Registry.create_custom_tasks_module(self._custom_tasks)
+            docstring = (inspect.getdoc(custom_tasks_module) or custom_tasks_module.__doc__ or "").strip()
+            module_to_docstring[custom_tasks_module] = docstring
+
+        module_to_task_names = {}
+        for module, docstring in module_to_docstring.items():
+            if hasattr(module, "TASKS_TABLE"):
+                task_names_in_module = []
+                for config in getattr(module, "TASKS_TABLE"):
+                    if config.name in task_configs:
+                        task_names_in_module.append(config.name)
+                if task_names_in_module:
+                    module_to_task_names[module] = task_names_in_module
+
+        def parse_docstring(docstring: str) -> dict:  # noqa: C901
+            """Parse a structured docstring into a JSON object.
+
+            Expected format:
+            key:
+            value
+
+            key2:
+            value2
+
+            Fields 'dataset', 'languages', and 'tags' are parsed as lists if comma-separated.
+            """
+            if not docstring:
+                return {}
+
+            parsed = {}
+            lines = docstring.split("\n")
+            current_key = None
+            current_value = []
+
+            list_fields = {"dataset", "languages", "tags"}
+
+            def process_current_key_value(current_key, current_value, list_fields, parsed):
+                if current_key and current_value:
+                    value = "\n".join(current_value).strip()
+                    if current_key in list_fields:
+                        if "," in value:
+                            parsed[current_key] = [item.strip() for item in value.split(",") if item.strip()]
+                        else:
+                            parsed[current_key] = [value] if value else []
+                    else:
+                        parsed[current_key] = value
+
+            for line in lines:
+                line = line.strip()
+                if not line:
+                    process_current_key_value(current_key, current_value, list_fields, parsed)
+                    current_value = []
+                    continue
+
+                if line.endswith(":"):
+                    process_current_key_value(current_key, current_value, list_fields, parsed)
+                    current_key = line[:-1].strip()
+                    current_value = []
+                else:
+                    if current_key:
+                        current_value.append(line)
+
+            process_current_key_value(current_key, current_value, list_fields, parsed)
+            return parsed
+
+        modules_data = []
+        for module, task_names in module_to_task_names.items():
+            docstring_raw = module_to_docstring.get(module, "")
+            docstring_parsed = parse_docstring(docstring_raw)
+            module_name = getattr(module, "__name__", str(module))
+
+            tasks_in_module = []
+            for task_name in task_names:
+                config = task_configs[task_name]
+                config_dict = asdict(config)
+                config_dict = {k: v.__str__() for k, v in config_dict.items()}
+                tasks_in_module.append({"name": task_name, "config": config_dict})
+
+            modules_data.append({"module": module_name, "docstring": docstring_parsed, "tasks": tasks_in_module})
+
+        return modules_data
diff --git a/src/lighteval/tasks/tasks/aimo.py b/src/lighteval/tasks/tasks/aimo.py
@@ -3,7 +3,7 @@
 AIMO Progress Prize 1
 
 dataset:
-https://www.kaggle.com/competitions/ai-mathematical-olympiad-prize
+lighteval/aimo_progress_prize_1
 
 abstract:
 Task to evaluate LLMs on the training set of the Kaggle AIMO competition: