From d7c370566be314a3a4b54beedff572096fdd33e3 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib19@gmail.com>
Date: Wed, 12 Nov 2025 16:50:36 +0100
Subject: [PATCH 1/7] add a task dump in registry for better documentation of
 tasks

---
 src/lighteval/main_tasks.py                   |  15 ++
 .../tasks/multilingual/tasks/global_mmlu.py   |   6 +-
 src/lighteval/tasks/registry.py               | 162 +++++++++++++++++-
 3 files changed, 176 insertions(+), 7 deletions(-)

diff --git a/src/lighteval/main_tasks.py b/src/lighteval/main_tasks.py
index 8286d13f1..fb71b1b77 100644
--- a/src/lighteval/main_tasks.py
+++ b/src/lighteval/main_tasks.py
@@ -19,6 +19,7 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
+import json
 import logging
 
 import typer
@@ -92,3 +93,17 @@ def create(template: str, task_name: str, dataset_name: str):
         f.write(content)
 
     logger.info(f"Task created in custom_{task_name}_task.py")
+
+
+@app.command()
+def dump(
+    load_tasks_multilingual: load_tasks_multilingual.type = load_tasks_multilingual.default,
+    custom_tasks: custom_tasks.type = custom_tasks.default,
+):
+    """Dump all task names, metadata, and docstrings as JSON"""
+    from lighteval.tasks.registry import Registry
+
+    registry = Registry(custom_tasks=custom_tasks, load_multilingual=load_tasks_multilingual)
+    modules_data = registry.get_tasks_dump()
+
+    print(json.dumps(modules_data, indent=2, default=str))
diff --git a/src/lighteval/tasks/multilingual/tasks/global_mmlu.py b/src/lighteval/tasks/multilingual/tasks/global_mmlu.py
index e9fdd2b04..54d9e8a71 100644
--- a/src/lighteval/tasks/multilingual/tasks/global_mmlu.py
+++ b/src/lighteval/tasks/multilingual/tasks/global_mmlu.py
@@ -35,8 +35,6 @@
 from lighteval.tasks.multilingual.utils.task_utils import get_metrics_for_formulation
 from lighteval.tasks.templates.multichoice import get_mcq_prompt_function
 from lighteval.tasks.templates.utils.formulation import (
-    CFFormulation,
-    HybridFormulation,
     MCFFormulation,
 )
 from lighteval.utils.language import Language
@@ -176,8 +174,6 @@
     ]
     for formulation in [
         MCFFormulation(),
-        CFFormulation(),
-        HybridFormulation(),
     ]
-    for sensitivity_label in ["ALL", "CA", "CS", "UNK"]
+    for sensitivity_label in ["ALL"]
 ]
diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py
index 324a38680..5ce5f1017 100644
--- a/src/lighteval/tasks/registry.py
+++ b/src/lighteval/tasks/registry.py
@@ -25,14 +25,16 @@
 import copy
 import importlib
 import importlib.util
+import inspect
 import logging
 import os
 import sys
 import time
-from functools import lru_cache
+from dataclasses import asdict
+from functools import lru_cache, partial
 from itertools import groupby
 from pathlib import Path
-from types import ModuleType
+from types import FunctionType, ModuleType
 
 from lighteval.tasks.lighteval_task import LightevalTask, LightevalTaskConfig
 
@@ -149,6 +151,9 @@ def __init__(
         else:
             self.tasks_list = self._get_full_task_list_from_input_string(tasks)
 
+        self._load_multilingual = load_multilingual
+        self._custom_tasks = custom_tasks
+
         self._task_registry = Registry.load_all_task_configs(
             custom_tasks=custom_tasks, load_multilingual=load_multilingual
         )
@@ -433,3 +438,156 @@ def print_all_tasks(self, suites: str | None = None):
         # Print summary
         total_tasks = len([t for t in tasks_names if t.split("|")[1]])
         print(f"\nTotal tasks displayed: {total_tasks}")
+
+    def get_tasks_dump(self) -> list[dict]:  # noqa: C901
+        """Get all task names, metadata, and docstrings as a Python object.
+
+        Returns:
+            list[dict]: List of dictionaries, each containing:
+                - module: Module name
+                - docstring: Parsed docstring as dict
+                - tasks: List of task configs for this module
+        """
+        task_configs = self._task_registry
+
+        TASKS_DIR = Path(__file__).parent / "tasks"
+        TASKS_DIR_MULTILINGUAL = Path(__file__).parent / "multilingual" / "tasks"
+        TASKS_DIR_SUBDIRS = Path(__file__).parent / "tasks"
+
+        task_files = [f for f in TASKS_DIR.glob("*.py") if f.name != "__init__.py"]
+        task_files_multilingual = [f for f in TASKS_DIR_MULTILINGUAL.glob("*.py") if f.name != "__init__.py"]
+        task_subdirs = [d for d in TASKS_DIR_SUBDIRS.iterdir() if d.is_dir() and (d / "main.py").exists()]
+
+        module_to_docstring = {}
+
+        for task_file in task_files:
+            module_name = task_file.stem
+            module = importlib.import_module(f"lighteval.tasks.tasks.{module_name}")
+            docstring = (inspect.getdoc(module) or module.__doc__ or "").strip()
+            module_to_docstring[module] = docstring
+
+        if self._load_multilingual:
+            for task_file in task_files_multilingual:
+                module_name = task_file.stem
+                module = importlib.import_module(f"lighteval.tasks.multilingual.tasks.{module_name}")
+                docstring = (inspect.getdoc(module) or module.__doc__ or "").strip()
+                module_to_docstring[module] = docstring
+
+        for task_dir in task_subdirs:
+            module_name = task_dir.name
+            module = importlib.import_module(f"lighteval.tasks.tasks.{module_name}.main")
+            docstring = (inspect.getdoc(module) or module.__doc__ or "").strip()
+            module_to_docstring[module] = docstring
+
+        if self._custom_tasks is not None:
+            custom_tasks_module = Registry.create_custom_tasks_module(self._custom_tasks)
+            docstring = (inspect.getdoc(custom_tasks_module) or custom_tasks_module.__doc__ or "").strip()
+            module_to_docstring[custom_tasks_module] = docstring
+
+        config_to_module = {}
+        module_to_task_names = {}
+        for module, docstring in module_to_docstring.items():
+            if hasattr(module, "TASKS_TABLE"):
+                task_names_in_module = []
+                for config in getattr(module, "TASKS_TABLE"):
+                    config_to_module[config.name] = module
+                    if config.name in task_configs:
+                        task_names_in_module.append(config.name)
+                if task_names_in_module:
+                    module_to_task_names[module] = task_names_in_module
+
+        def parse_docstring(docstring: str) -> dict:  # noqa: C901
+            """Parse a structured docstring into a JSON object.
+
+            Expected format:
+            key:
+            value
+
+            key2:
+            value2
+
+            Fields 'dataset', 'languages', and 'tags' are parsed as lists if comma-separated.
+            """
+            if not docstring:
+                return {}
+
+            parsed = {}
+            lines = docstring.split("\n")
+            current_key = None
+            current_value = []
+
+            list_fields = {"dataset", "languages", "tags"}
+
+            for line in lines:
+                line = line.strip()
+                if not line:
+                    if current_key and current_value:
+                        value = "\n".join(current_value).strip()
+                        if current_key in list_fields:
+                            if "," in value:
+                                parsed[current_key] = [item.strip() for item in value.split(",") if item.strip()]
+                            else:
+                                parsed[current_key] = [value] if value else []
+                        else:
+                            parsed[current_key] = value
+                        current_value = []
+                    continue
+
+                if line.endswith(":"):
+                    if current_key and current_value:
+                        value = "\n".join(current_value).strip()
+                        if current_key in list_fields:
+                            if "," in value:
+                                parsed[current_key] = [item.strip() for item in value.split(",") if item.strip()]
+                            else:
+                                parsed[current_key] = [value] if value else []
+                        else:
+                            parsed[current_key] = value
+                    current_key = line[:-1].strip()
+                    current_value = []
+                else:
+                    if current_key:
+                        current_value.append(line)
+
+            if current_key and current_value:
+                value = "\n".join(current_value).strip()
+                if current_key in list_fields:
+                    if "," in value:
+                        parsed[current_key] = [item.strip() for item in value.split(",") if item.strip()]
+                    else:
+                        parsed[current_key] = [value] if value else []
+                else:
+                    parsed[current_key] = value
+
+            return parsed
+
+        def serialize_value(v):
+            if isinstance(v, (FunctionType, partial)):
+                func_name = getattr(v.func if isinstance(v, partial) else v, "__name__", str(v))
+                return f"<function: {func_name}>"
+            if callable(v) and not isinstance(v, type):
+                return f"<callable: {getattr(v, '__name__', str(v))}>"
+            if isinstance(v, type):
+                return f"<class: {v.__name__}>"
+            if type(v) in (list, tuple):
+                return [serialize_value(item) for item in v]
+            if isinstance(v, dict):
+                return {k: serialize_value(val) for k, val in v.items()}
+            return v
+
+        modules_data = []
+        for module, task_names in module_to_task_names.items():
+            docstring_raw = module_to_docstring.get(module, "")
+            docstring_parsed = parse_docstring(docstring_raw)
+            module_name = getattr(module, "__name__", str(module))
+
+            tasks_in_module = []
+            for task_name in task_names:
+                config = task_configs[task_name]
+                config_dict = asdict(config)
+                config_dict = {k: serialize_value(v) for k, v in config_dict.items()}
+                tasks_in_module.append({"name": task_name, "config": config_dict})
+
+            modules_data.append({"module": module_name, "docstring": docstring_parsed, "tasks": tasks_in_module})
+
+        return modules_data

From c4f655d1a0ea17ee68d9c22d92b55a449009093e Mon Sep 17 00:00:00 2001
From: Nathan Habib <30601243+NathanHB@users.noreply.github.com>
Date: Wed, 12 Nov 2025 16:57:21 +0100
Subject: [PATCH 2/7] Update src/lighteval/tasks/registry.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/lighteval/tasks/registry.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py
index 5ce5f1017..be8b8803d 100644
--- a/src/lighteval/tasks/registry.py
+++ b/src/lighteval/tasks/registry.py
@@ -452,11 +452,11 @@ def get_tasks_dump(self) -> list[dict]:  # noqa: C901
 
         TASKS_DIR = Path(__file__).parent / "tasks"
         TASKS_DIR_MULTILINGUAL = Path(__file__).parent / "multilingual" / "tasks"
-        TASKS_DIR_SUBDIRS = Path(__file__).parent / "tasks"
+
 
         task_files = [f for f in TASKS_DIR.glob("*.py") if f.name != "__init__.py"]
         task_files_multilingual = [f for f in TASKS_DIR_MULTILINGUAL.glob("*.py") if f.name != "__init__.py"]
-        task_subdirs = [d for d in TASKS_DIR_SUBDIRS.iterdir() if d.is_dir() and (d / "main.py").exists()]
+        task_subdirs = [d for d in TASKS_DIR.iterdir() if d.is_dir() and (d / "main.py").exists()]
 
         module_to_docstring = {}
 

From fa181746c38922fa46a3e563829d40ce89eec8d7 Mon Sep 17 00:00:00 2001
From: Nathan Habib <30601243+NathanHB@users.noreply.github.com>
Date: Wed, 12 Nov 2025 16:57:35 +0100
Subject: [PATCH 3/7] Update src/lighteval/tasks/registry.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/lighteval/tasks/registry.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py
index be8b8803d..d5cb47b25 100644
--- a/src/lighteval/tasks/registry.py
+++ b/src/lighteval/tasks/registry.py
@@ -484,13 +484,11 @@ def get_tasks_dump(self) -> list[dict]:  # noqa: C901
             docstring = (inspect.getdoc(custom_tasks_module) or custom_tasks_module.__doc__ or "").strip()
             module_to_docstring[custom_tasks_module] = docstring
 
-        config_to_module = {}
         module_to_task_names = {}
         for module, docstring in module_to_docstring.items():
             if hasattr(module, "TASKS_TABLE"):
                 task_names_in_module = []
                 for config in getattr(module, "TASKS_TABLE"):
-                    config_to_module[config.name] = module
                     if config.name in task_configs:
                         task_names_in_module.append(config.name)
                 if task_names_in_module:

From d9a005a8132576817ac6c93710398177ed4631a4 Mon Sep 17 00:00:00 2001
From: Nathan Habib <30601243+NathanHB@users.noreply.github.com>
Date: Wed, 12 Nov 2025 16:57:52 +0100
Subject: [PATCH 4/7] Update src/lighteval/tasks/registry.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/lighteval/tasks/registry.py | 44 +++++++++++----------------------
 1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py
index d5cb47b25..04d36c7c6 100644
--- a/src/lighteval/tasks/registry.py
+++ b/src/lighteval/tasks/registry.py
@@ -516,47 +516,33 @@ def parse_docstring(docstring: str) -> dict:  # noqa: C901
 
             list_fields = {"dataset", "languages", "tags"}
 
+            def process_current_key_value(current_key, current_value, list_fields, parsed):
+                if current_key and current_value:
+                    value = "\n".join(current_value).strip()
+                    if current_key in list_fields:
+                        if "," in value:
+                            parsed[current_key] = [item.strip() for item in value.split(",") if item.strip()]
+                        else:
+                            parsed[current_key] = [value] if value else []
+                    else:
+                        parsed[current_key] = value
+
             for line in lines:
                 line = line.strip()
                 if not line:
-                    if current_key and current_value:
-                        value = "\n".join(current_value).strip()
-                        if current_key in list_fields:
-                            if "," in value:
-                                parsed[current_key] = [item.strip() for item in value.split(",") if item.strip()]
-                            else:
-                                parsed[current_key] = [value] if value else []
-                        else:
-                            parsed[current_key] = value
-                        current_value = []
+                    process_current_key_value(current_key, current_value, list_fields, parsed)
+                    current_value = []
                     continue
 
                 if line.endswith(":"):
-                    if current_key and current_value:
-                        value = "\n".join(current_value).strip()
-                        if current_key in list_fields:
-                            if "," in value:
-                                parsed[current_key] = [item.strip() for item in value.split(",") if item.strip()]
-                            else:
-                                parsed[current_key] = [value] if value else []
-                        else:
-                            parsed[current_key] = value
+                    process_current_key_value(current_key, current_value, list_fields, parsed)
                     current_key = line[:-1].strip()
                     current_value = []
                 else:
                     if current_key:
                         current_value.append(line)
 
-            if current_key and current_value:
-                value = "\n".join(current_value).strip()
-                if current_key in list_fields:
-                    if "," in value:
-                        parsed[current_key] = [item.strip() for item in value.split(",") if item.strip()]
-                    else:
-                        parsed[current_key] = [value] if value else []
-                else:
-                    parsed[current_key] = value
-
+            process_current_key_value(current_key, current_value, list_fields, parsed)
             return parsed
 
         def serialize_value(v):

From f4fb2878ec3a8f926d3b50ec33a3fb34be08a58a Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib19@gmail.com>
Date: Wed, 12 Nov 2025 16:58:46 +0100
Subject: [PATCH 5/7] fix

---
 src/lighteval/tasks/registry.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py
index 04d36c7c6..52d23f032 100644
--- a/src/lighteval/tasks/registry.py
+++ b/src/lighteval/tasks/registry.py
@@ -453,7 +453,6 @@ def get_tasks_dump(self) -> list[dict]:  # noqa: C901
         TASKS_DIR = Path(__file__).parent / "tasks"
         TASKS_DIR_MULTILINGUAL = Path(__file__).parent / "multilingual" / "tasks"
 
-
         task_files = [f for f in TASKS_DIR.glob("*.py") if f.name != "__init__.py"]
         task_files_multilingual = [f for f in TASKS_DIR_MULTILINGUAL.glob("*.py") if f.name != "__init__.py"]
         task_subdirs = [d for d in TASKS_DIR.iterdir() if d.is_dir() and (d / "main.py").exists()]

From 0a18cb86b49164918edc2b39b081e087e3e12491 Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib19@gmail.com>
Date: Wed, 12 Nov 2025 17:02:20 +0100
Subject: [PATCH 6/7] remove

---
 src/lighteval/tasks/registry.py | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py
index 52d23f032..e7c4e9eb6 100644
--- a/src/lighteval/tasks/registry.py
+++ b/src/lighteval/tasks/registry.py
@@ -31,10 +31,10 @@
 import sys
 import time
 from dataclasses import asdict
-from functools import lru_cache, partial
+from functools import lru_cache
 from itertools import groupby
 from pathlib import Path
-from types import FunctionType, ModuleType
+from types import ModuleType
 
 from lighteval.tasks.lighteval_task import LightevalTask, LightevalTaskConfig
 
@@ -544,20 +544,6 @@ def process_current_key_value(current_key, current_value, list_fields, parsed):
             process_current_key_value(current_key, current_value, list_fields, parsed)
             return parsed
 
-        def serialize_value(v):
-            if isinstance(v, (FunctionType, partial)):
-                func_name = getattr(v.func if isinstance(v, partial) else v, "__name__", str(v))
-                return f"<function: {func_name}>"
-            if callable(v) and not isinstance(v, type):
-                return f"<callable: {getattr(v, '__name__', str(v))}>"
-            if isinstance(v, type):
-                return f"<class: {v.__name__}>"
-            if type(v) in (list, tuple):
-                return [serialize_value(item) for item in v]
-            if isinstance(v, dict):
-                return {k: serialize_value(val) for k, val in v.items()}
-            return v
-
         modules_data = []
         for module, task_names in module_to_task_names.items():
             docstring_raw = module_to_docstring.get(module, "")
@@ -568,7 +554,7 @@ def serialize_value(v):
             for task_name in task_names:
                 config = task_configs[task_name]
                 config_dict = asdict(config)
-                config_dict = {k: serialize_value(v) for k, v in config_dict.items()}
+                config_dict = {k: v.__str__() for k, v in config_dict.items()}
                 tasks_in_module.append({"name": task_name, "config": config_dict})
 
             modules_data.append({"module": module_name, "docstring": docstring_parsed, "tasks": tasks_in_module})

From c1366530e344c519c2ade73b475442d9deb7c82f Mon Sep 17 00:00:00 2001
From: Nathan Habib <nathan.habib19@gmail.com>
Date: Wed, 12 Nov 2025 17:09:52 +0100
Subject: [PATCH 7/7] fix aimo

---
 src/lighteval/tasks/tasks/aimo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lighteval/tasks/tasks/aimo.py b/src/lighteval/tasks/tasks/aimo.py
index 0bd519dc9..fdfc5ff95 100644
--- a/src/lighteval/tasks/tasks/aimo.py
+++ b/src/lighteval/tasks/tasks/aimo.py
@@ -3,7 +3,7 @@
 AIMO Progress Prize 1
 
 dataset:
-https://www.kaggle.com/competitions/ai-mathematical-olympiad-prize
+lighteval/aimo_progress_prize_1
 
 abstract:
 Task to evaluate LLMs on the training set of the Kaggle AIMO competition: