From 2a7b29a547d10b80370d689d2e487341918f2da1 Mon Sep 17 00:00:00 2001 From: Lysandre Date: Tue, 9 Sep 2025 17:10:19 +0200 Subject: [PATCH 1/7] Rework the imports to be more versatile Style --- docs/source/using-the-python-api.mdx | 4 +- src/lighteval/logging/evaluation_tracker.py | 10 +- src/lighteval/logging/info_loggers.py | 4 +- src/lighteval/main_nanotron.py | 11 +- .../metrics/imports/data_stats_metric.py | 5 +- .../metrics/utils/extractive_match_utils.py | 8 +- .../metrics/utils/linguistic_tokenizers.py | 13 +- src/lighteval/metrics/utils/llm_as_judge.py | 11 +- .../metrics/utils/math_comparison.py | 4 +- .../models/endpoints/litellm_model.py | 5 +- src/lighteval/models/endpoints/tgi_model.py | 7 +- src/lighteval/models/model_loader.py | 26 +-- .../models/nanotron/nanotron_model.py | 4 +- src/lighteval/models/sglang/sglang_model.py | 5 +- .../models/transformers/adapter_model.py | 9 +- .../models/transformers/transformers_model.py | 8 +- .../transformers/vlm_transformers_model.py | 4 +- src/lighteval/models/vllm/vllm_model.py | 6 +- src/lighteval/pipeline.py | 42 ++--- src/lighteval/tasks/extended/__init__.py | 23 +-- .../tasks/extended/ifeval/instructions.py | 6 +- src/lighteval/tasks/extended/ifeval/main.py | 2 + src/lighteval/utils/imports.py | 170 +++++------------- src/lighteval/utils/parallelism.py | 13 +- tests/unit/pipeline/test_reasoning_tags.py | 14 +- tests/unit/utils/test_caching.py | 4 +- tests/utils.py | 4 +- 27 files changed, 144 insertions(+), 278 deletions(-) diff --git a/docs/source/using-the-python-api.mdx b/docs/source/using-the-python-api.mdx index 1a21ebe4b..f09802272 100644 --- a/docs/source/using-the-python-api.mdx +++ b/docs/source/using-the-python-api.mdx @@ -12,9 +12,9 @@ import lighteval from lighteval.logging.evaluation_tracker import EvaluationTracker from lighteval.models.vllm.vllm_model import VLLMModelConfig from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters -from lighteval.utils.imports import is_accelerate_available +from lighteval.utils.imports import is_package_available -if is_accelerate_available(): +if is_package_available("accelerate"): from datetime import timedelta from accelerate import Accelerator, InitProcessGroupKwargs accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))]) diff --git a/src/lighteval/logging/evaluation_tracker.py b/src/lighteval/logging/evaluation_tracker.py index 108877601..aed32d2f1 100644 --- a/src/lighteval/logging/evaluation_tracker.py +++ b/src/lighteval/logging/evaluation_tracker.py @@ -43,13 +43,13 @@ TaskConfigLogger, VersionsLogger, ) -from lighteval.utils.imports import NO_TENSORBOARDX_WARN_MSG, is_nanotron_available, is_tensorboardX_available +from lighteval.utils.imports import is_package_available, not_installed_error_message from lighteval.utils.utils import obj_to_markdown logger = logging.getLogger(__name__) -if is_nanotron_available(): +if is_package_available("nanotron"): from nanotron.config import GeneralArgs # type: ignore try: @@ -659,11 +659,11 @@ def recreate_metadata_card(self, repo_id: str) -> None: # noqa: C901 def push_to_tensorboard( # noqa: C901 self, results: dict[str, dict[str, float]], details: dict[str, DetailsLogger.CompiledDetail] ): - if not is_tensorboardX_available: - logger.warning(NO_TENSORBOARDX_WARN_MSG) + if not is_package_available("tensorboardX"): + logger.warning(not_installed_error_message("tensorboardX")) return - if not is_nanotron_available(): + if not is_package_available("nanotron"): logger.warning("You cannot push results to tensorboard without having nanotron installed. Skipping") return diff --git a/src/lighteval/logging/info_loggers.py b/src/lighteval/logging/info_loggers.py index 64019ecf8..4482fabb2 100644 --- a/src/lighteval/logging/info_loggers.py +++ b/src/lighteval/logging/info_loggers.py @@ -34,13 +34,13 @@ from lighteval.models.model_output import ModelResponse from lighteval.tasks.lighteval_task import LightevalTask, LightevalTaskConfig from lighteval.tasks.requests import Doc -from lighteval.utils.imports import is_nanotron_available +from lighteval.utils.imports import is_package_available logger = logging.getLogger(__name__) -if is_nanotron_available(): +if is_package_available("nanotron"): pass diff --git a/src/lighteval/main_nanotron.py b/src/lighteval/main_nanotron.py index 1131aea33..b844a74a4 100644 --- a/src/lighteval/main_nanotron.py +++ b/src/lighteval/main_nanotron.py @@ -32,11 +32,13 @@ reasoning_tags, remove_reasoning_tags, ) +from lighteval.utils.imports import requires SEED = 1234 +@requires("nanotron") def nanotron( checkpoint_config_path: Annotated[ str, Option(help="Path to the nanotron checkpoint YAML or python config file, potentially on s3.") @@ -45,12 +47,9 @@ def nanotron( remove_reasoning_tags: remove_reasoning_tags.type = remove_reasoning_tags.default, reasoning_tags: reasoning_tags.type = reasoning_tags.default, ): - """Evaluate models using nanotron as backend.""" - from lighteval.utils.imports import NO_NANOTRON_ERROR_MSG, is_nanotron_available - - if not is_nanotron_available(): - raise ImportError(NO_NANOTRON_ERROR_MSG) - + """ + Evaluate models using nanotron as backend. + """ from nanotron.config import GeneralArgs, ModelArgs, TokenizerArgs, get_config_from_dict, get_config_from_file from lighteval.logging.evaluation_tracker import EvaluationTracker diff --git a/src/lighteval/metrics/imports/data_stats_metric.py b/src/lighteval/metrics/imports/data_stats_metric.py index 883966be4..818c83e0e 100644 --- a/src/lighteval/metrics/imports/data_stats_metric.py +++ b/src/lighteval/metrics/imports/data_stats_metric.py @@ -30,7 +30,7 @@ from typing import Literal from lighteval.metrics.imports.data_stats_utils import Fragments -from lighteval.utils.imports import NO_SPACY_ERROR_MSG, is_spacy_available +from lighteval.utils.imports import raise_if_package_not_available logger = logging.getLogger(__name__) @@ -86,8 +86,7 @@ def __init__( determines the spaCy model used for tokenization. Currently supports English, German, French, and Italian. """ - if not is_spacy_available(): - raise ImportError(NO_SPACY_ERROR_MSG) + raise_if_package_not_available("spacy") import spacy self.n_gram = n_gram diff --git a/src/lighteval/metrics/utils/extractive_match_utils.py b/src/lighteval/metrics/utils/extractive_match_utils.py index d16145aea..cce2b1793 100644 --- a/src/lighteval/metrics/utils/extractive_match_utils.py +++ b/src/lighteval/metrics/utils/extractive_match_utils.py @@ -34,12 +34,12 @@ from lighteval.tasks.requests import Doc from lighteval.tasks.templates.utils.formulation import ChoicePrefix, get_prefix from lighteval.tasks.templates.utils.translation_literals import TRANSLATION_LITERALS -from lighteval.utils.imports import requires_latex2sympy2_extended +from lighteval.utils.imports import requires from lighteval.utils.language import Language from lighteval.utils.timeout import timeout -@requires_latex2sympy2_extended +@requires("latex2sympy2_extended") def latex_normalization_config_default_factory(): from latex2sympy2_extended.latex2sympy2 import NormalizationConfig @@ -373,7 +373,7 @@ def get_target_type_order(target_type: ExtractionTarget) -> int: # Small cache, to catche repeated calls invalid parsing @lru_cache(maxsize=20) -@requires_latex2sympy2_extended +@requires("latex2sympy2_extended") def parse_latex_with_timeout(latex: str, timeout_seconds: int): from latex2sympy2_extended.latex2sympy2 import latex2sympy @@ -428,7 +428,7 @@ def convert_to_pct(number: Number): return sympy.Mul(number, sympy.Rational(1, 100), evaluate=False) -@requires_latex2sympy2_extended +@requires("latex2sympy2_extended") @lru_cache(maxsize=20) def extract_latex( match: re.Match, latex_config: LatexExtractionConfig, timeout_seconds: int diff --git a/src/lighteval/metrics/utils/linguistic_tokenizers.py b/src/lighteval/metrics/utils/linguistic_tokenizers.py index e0dd9ef1a..137ac3417 100644 --- a/src/lighteval/metrics/utils/linguistic_tokenizers.py +++ b/src/lighteval/metrics/utils/linguistic_tokenizers.py @@ -18,10 +18,8 @@ from typing import Callable, Iterator from lighteval.utils.imports import ( - NO_SPACY_TOKENIZER_ERROR_MSG, - NO_STANZA_TOKENIZER_ERROR_MSG, - can_load_spacy_tokenizer, - can_load_stanza_tokenizer, + Extras, + raise_if_package_not_available, ) from lighteval.utils.language import Language @@ -102,8 +100,8 @@ def span_tokenize(self, text: str) -> list[tuple[int, int]]: class SpaCyTokenizer(WordTokenizer): def __init__(self, spacy_language: str, config=None): super().__init__() - if not can_load_spacy_tokenizer(spacy_language): - raise ImportError(NO_SPACY_TOKENIZER_ERROR_MSG) + raise_if_package_not_available(Extras.MULTILINGUAL, language=spacy_language) + self.spacy_language = spacy_language self.config = config self._tokenizer = None @@ -140,8 +138,7 @@ def span_tokenize(self, text: str) -> list[tuple[int, int]]: class StanzaTokenizer(WordTokenizer): def __init__(self, stanza_language: str, **stanza_kwargs): super().__init__() - if not can_load_stanza_tokenizer(): - raise ImportError(NO_STANZA_TOKENIZER_ERROR_MSG) + raise_if_package_not_available("stanza") self.stanza_language = stanza_language self.stanza_kwargs = stanza_kwargs self._tokenizer = None diff --git a/src/lighteval/metrics/utils/llm_as_judge.py b/src/lighteval/metrics/utils/llm_as_judge.py index 22da4b3e3..7e1b775c9 100644 --- a/src/lighteval/metrics/utils/llm_as_judge.py +++ b/src/lighteval/metrics/utils/llm_as_judge.py @@ -34,7 +34,7 @@ from tqdm import tqdm from tqdm.asyncio import tqdm_asyncio -from lighteval.utils.imports import is_litellm_available, is_openai_available, is_vllm_available +from lighteval.utils.imports import raise_if_package_not_available from lighteval.utils.utils import as_list @@ -151,8 +151,7 @@ def __lazy_load_client(self): # noqa: C901 # Both "openai" and "tgi" backends use the OpenAI-compatible API # They are handled separately to allow for backend-specific validation and setup case "openai" | "tgi": - if not is_openai_available(): - raise RuntimeError("OpenAI backend is not available.") + raise_if_package_not_available("openai") if self.client is None: from openai import OpenAI @@ -162,13 +161,11 @@ def __lazy_load_client(self): # noqa: C901 return self.__call_api_parallel case "litellm": - if not is_litellm_available(): - raise RuntimeError("litellm is not available.") + raise_if_package_not_available("litellm") return self.__call_litellm case "vllm": - if not is_vllm_available(): - raise RuntimeError("vllm is not available.") + raise_if_package_not_available("vllm") if self.pipe is None: from vllm import LLM, SamplingParams from vllm.transformers_utils.tokenizer import get_tokenizer diff --git a/src/lighteval/metrics/utils/math_comparison.py b/src/lighteval/metrics/utils/math_comparison.py index 2650ee335..2329acfe0 100644 --- a/src/lighteval/metrics/utils/math_comparison.py +++ b/src/lighteval/metrics/utils/math_comparison.py @@ -51,7 +51,7 @@ from sympy.core.function import UndefinedFunction from sympy.core.relational import Relational -from lighteval.utils.imports import requires_latex2sympy2_extended +from lighteval.utils.imports import requires from lighteval.utils.timeout import timeout @@ -308,7 +308,7 @@ def is_equation(expr: Basic | MatrixBase) -> bool: return False -@requires_latex2sympy2_extended +@requires("latex2sympy2_extended") def is_assignment_relation(expr: Basic | MatrixBase) -> bool: from latex2sympy2_extended.latex2sympy2 import is_expr_of_only_symbols diff --git a/src/lighteval/models/endpoints/litellm_model.py b/src/lighteval/models/endpoints/litellm_model.py index 00f7c8779..e620fba70 100644 --- a/src/lighteval/models/endpoints/litellm_model.py +++ b/src/lighteval/models/endpoints/litellm_model.py @@ -32,12 +32,12 @@ from lighteval.tasks.prompt_manager import PromptManager from lighteval.tasks.requests import Doc, SamplingMethod from lighteval.utils.cache_management import SampleCache, cached -from lighteval.utils.imports import is_litellm_available +from lighteval.utils.imports import is_package_available, requires logger = logging.getLogger(__name__) -if is_litellm_available(): +if is_package_available("litellm"): import litellm from litellm import encode from litellm.caching.caching import Cache @@ -110,6 +110,7 @@ class LiteLLMModelConfig(ModelConfig): concurrent_requests: int = 10 +@requires("litellm") class LiteLLMClient(LightevalModel): _DEFAULT_MAX_LENGTH: int = 4096 diff --git a/src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/endpoints/tgi_model.py index 8130cba88..4b4847fe9 100644 --- a/src/lighteval/models/endpoints/tgi_model.py +++ b/src/lighteval/models/endpoints/tgi_model.py @@ -32,10 +32,10 @@ from lighteval.models.endpoints.endpoint_model import InferenceEndpointModel from lighteval.tasks.prompt_manager import PromptManager from lighteval.utils.cache_management import SampleCache -from lighteval.utils.imports import NO_TGI_ERROR_MSG, is_tgi_available +from lighteval.utils.imports import is_package_available, requires -if is_tgi_available(): +if is_package_available("tgi"): from text_generation import AsyncClient else: from unittest.mock import Mock @@ -99,12 +99,11 @@ class TGIModelConfig(ModelConfig): # inherit from InferenceEndpointModel instead of LightevalModel since they both use the same interface, and only overwrite # the client functions, since they use a different client. +@requires("tgi") class ModelClient(InferenceEndpointModel): _DEFAULT_MAX_LENGTH: int = 4096 def __init__(self, config: TGIModelConfig) -> None: - if not is_tgi_available(): - raise ImportError(NO_TGI_ERROR_MSG) headers = ( {} if config.inference_server_auth is None else {"Authorization": f"Bearer {config.inference_server_auth}"} ) diff --git a/src/lighteval/models/model_loader.py b/src/lighteval/models/model_loader.py index ccae20a5f..46129960d 100644 --- a/src/lighteval/models/model_loader.py +++ b/src/lighteval/models/model_loader.py @@ -43,16 +43,7 @@ from lighteval.models.transformers.transformers_model import TransformersModel, TransformersModelConfig from lighteval.models.transformers.vlm_transformers_model import VLMTransformersModel, VLMTransformersModelConfig from lighteval.models.vllm.vllm_model import AsyncVLLMModel, VLLMModel, VLLMModelConfig -from lighteval.utils.imports import ( - NO_LITELLM_ERROR_MSG, - NO_SGLANG_ERROR_MSG, - NO_TGI_ERROR_MSG, - NO_VLLM_ERROR_MSG, - is_litellm_available, - is_sglang_available, - is_tgi_available, - is_vllm_available, -) +from lighteval.utils.imports import raise_if_package_not_available, requires logger = logging.getLogger(__name__) @@ -101,19 +92,15 @@ def load_model( # noqa: C901 return load_inference_providers_model(config=config) +@requires("tgi") def load_model_with_tgi(config: TGIModelConfig): - if not is_tgi_available(): - raise ImportError(NO_TGI_ERROR_MSG) - logger.info(f"Load model from inference server: {config.inference_server_address}") model = ModelClient(config=config) return model +@requires("litellm") def load_litellm_model(config: LiteLLMModelConfig): - if not is_litellm_available(): - raise ImportError(NO_LITELLM_ERROR_MSG) - model = LiteLLMClient(config) return model @@ -163,8 +150,7 @@ def load_model_with_accelerate_or_default( elif isinstance(config, DeltaModelConfig): model = DeltaModel(config=config) elif isinstance(config, VLLMModelConfig): - if not is_vllm_available(): - raise ImportError(NO_VLLM_ERROR_MSG) + raise_if_package_not_available("vllm") if config.is_async: model = AsyncVLLMModel(config=config) else: @@ -185,8 +171,6 @@ def load_inference_providers_model(config: InferenceProvidersModelConfig): return InferenceProvidersClient(config=config) +@requires("sglang") def load_sglang_model(config: SGLangModelConfig): - if not is_sglang_available(): - raise ImportError(NO_SGLANG_ERROR_MSG) - return SGLangModel(config=config) diff --git a/src/lighteval/models/nanotron/nanotron_model.py b/src/lighteval/models/nanotron/nanotron_model.py index 310843d32..7ed6d35eb 100644 --- a/src/lighteval/models/nanotron/nanotron_model.py +++ b/src/lighteval/models/nanotron/nanotron_model.py @@ -51,7 +51,7 @@ SamplingMethod, ) from lighteval.utils.cache_management import SampleCache, cached -from lighteval.utils.imports import is_nanotron_available +from lighteval.utils.imports import is_package_available from lighteval.utils.parallelism import find_executable_batch_size from lighteval.utils.utils import as_list @@ -63,7 +63,7 @@ TokenSequence = Union[List[int], torch.LongTensor, torch.Tensor, BatchEncoding] -if is_nanotron_available(): +if is_package_available("nanotron"): from nanotron import distributed as dist from nanotron import logging from nanotron.config import GeneralArgs, ModelArgs, TokenizerArgs diff --git a/src/lighteval/models/sglang/sglang_model.py b/src/lighteval/models/sglang/sglang_model.py index fe37c64f9..220d5159b 100644 --- a/src/lighteval/models/sglang/sglang_model.py +++ b/src/lighteval/models/sglang/sglang_model.py @@ -35,12 +35,12 @@ from lighteval.tasks.prompt_manager import PromptManager from lighteval.tasks.requests import Doc, SamplingMethod from lighteval.utils.cache_management import SampleCache, cached -from lighteval.utils.imports import is_sglang_available +from lighteval.utils.imports import is_package_available, requires logger = logging.getLogger(__name__) -if is_sglang_available(): +if is_package_available("sglang"): from sglang import Engine from sglang.srt.hf_transformers_utils import get_tokenizer @@ -138,6 +138,7 @@ class SGLangModelConfig(ModelConfig): override_chat_template: bool = None +@requires("sglang") class SGLangModel(LightevalModel): def __init__( self, diff --git a/src/lighteval/models/transformers/adapter_model.py b/src/lighteval/models/transformers/adapter_model.py index a868ad20f..52f339664 100644 --- a/src/lighteval/models/transformers/adapter_model.py +++ b/src/lighteval/models/transformers/adapter_model.py @@ -30,15 +30,16 @@ from lighteval.models.transformers.transformers_model import TransformersModel, TransformersModelConfig from lighteval.models.utils import _get_dtype -from lighteval.utils.imports import NO_PEFT_ERROR_MSG, is_peft_available +from lighteval.utils.imports import is_package_available, requires logger = logging.getLogger(__name__) -if is_peft_available(): +if is_package_available("peft"): from peft import PeftModel +@requires("peft") class AdapterModelConfig(TransformersModelConfig): """Configuration class for PEFT (Parameter-Efficient Fine-Tuning) adapter models. @@ -58,10 +59,6 @@ class AdapterModelConfig(TransformersModelConfig): base_model: str - def model_post_init(self, __context): - if not is_peft_available(): - raise ImportError(NO_PEFT_ERROR_MSG) - class AdapterModel(TransformersModel): def _create_auto_model(self) -> transformers.PreTrainedModel: diff --git a/src/lighteval/models/transformers/transformers_model.py b/src/lighteval/models/transformers/transformers_model.py index 0fb6df464..ed97faf84 100644 --- a/src/lighteval/models/transformers/transformers_model.py +++ b/src/lighteval/models/transformers/transformers_model.py @@ -55,7 +55,7 @@ from lighteval.tasks.requests import Doc, SamplingMethod from lighteval.utils.cache_management import SampleCache, cached from lighteval.utils.imports import ( - is_accelerate_available, + is_package_available, ) from lighteval.utils.parallelism import find_executable_batch_size @@ -227,7 +227,7 @@ def __init__( self.model_name = _simplify_name(config.model_name) - if is_accelerate_available(): + if is_package_available("accelerate"): model_size, _ = calculate_maximum_sizes(self.model) model_size = convert_bytes(model_size) else: @@ -290,7 +290,7 @@ def from_model( else: self._device = self.config.device - if is_accelerate_available(): + if is_package_available("accelerate"): model_size, _ = calculate_maximum_sizes(self.model) model_size = convert_bytes(model_size) else: @@ -331,7 +331,7 @@ def disable_tqdm(self) -> bool: def init_model_parallel(self, model_parallel: bool | None = None) -> Tuple[bool, Optional[dict], Optional[str]]: """Compute all the parameters related to model_parallel""" - if not is_accelerate_available(): + if not is_package_available("accelerate"): return False, None, None self.num_local_processes = int(os.environ.get("LOCAL_WORLD_SIZE", 1)) diff --git a/src/lighteval/models/transformers/vlm_transformers_model.py b/src/lighteval/models/transformers/vlm_transformers_model.py index 3da1290be..0697ab729 100644 --- a/src/lighteval/models/transformers/vlm_transformers_model.py +++ b/src/lighteval/models/transformers/vlm_transformers_model.py @@ -47,7 +47,7 @@ from lighteval.tasks.requests import Doc, SamplingMethod from lighteval.utils.cache_management import SampleCache, cached from lighteval.utils.imports import ( - is_accelerate_available, + is_package_available, ) @@ -210,7 +210,7 @@ def disable_tqdm(self) -> bool: # Copied from ./transformers_model.py def init_model_parallel(self, model_parallel: bool | None = None) -> Tuple[bool, Optional[dict], Optional[str]]: """Compute all the parameters related to model_parallel""" - if not is_accelerate_available(): + if not is_package_available("accelerate"): return False, None, None self.num_local_processes = int(os.environ.get("LOCAL_WORLD_SIZE", 1)) diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index 68632410a..f0a9123e9 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -38,13 +38,13 @@ from lighteval.tasks.prompt_manager import PromptManager from lighteval.tasks.requests import Doc, SamplingMethod from lighteval.utils.cache_management import SampleCache, cached -from lighteval.utils.imports import is_vllm_available +from lighteval.utils.imports import is_package_available, requires logger = logging.getLogger(__name__) -if is_vllm_available(): +if is_package_available("vllm"): import ray from more_itertools import distribute from vllm import LLM, RequestOutput, SamplingParams @@ -179,6 +179,7 @@ class VLLMModelConfig(ModelConfig): override_chat_template: bool = None +@requires("vllm") class VLLMModel(LightevalModel): def __init__( self, @@ -531,6 +532,7 @@ def loglikelihood_rolling(self, docs: list[Doc]) -> list[ModelResponse]: raise NotImplementedError() +@requires("vllm") class AsyncVLLMModel(VLLMModel): """VLLM models which deploy async natively (no ray). Supports DP and PP/TP but not batch size > 1""" diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py index f79e8c910..1416ce7c8 100644 --- a/src/lighteval/pipeline.py +++ b/src/lighteval/pipeline.py @@ -42,31 +42,19 @@ from lighteval.tasks.lighteval_task import LightevalTask from lighteval.tasks.registry import Registry from lighteval.tasks.requests import SamplingMethod -from lighteval.utils.imports import ( - NO_ACCELERATE_ERROR_MSG, - NO_NANOTRON_ERROR_MSG, - NO_OPENAI_ERROR_MSG, - NO_SGLANG_ERROR_MSG, - NO_TGI_ERROR_MSG, - NO_VLLM_ERROR_MSG, - is_accelerate_available, - is_nanotron_available, - is_openai_available, - is_sglang_available, - is_tgi_available, - is_vllm_available, -) +from lighteval.utils.imports import is_package_available, raise_if_package_not_available from lighteval.utils.parallelism import test_all_gather from lighteval.utils.utils import make_results_table, remove_reasoning_tags -if is_accelerate_available(): +if is_package_available("accelerate"): from accelerate import Accelerator, InitProcessGroupKwargs else: from unittest.mock import Mock Accelerator = InitProcessGroupKwargs = Mock() -if is_nanotron_available(): + +if is_package_available("nanotron"): from nanotron import distributed as dist from nanotron.parallel.context import ParallelContext @@ -110,23 +98,17 @@ class PipelineParameters: def __post_init__(self): # noqa C901 # Import testing if self.launcher_type == ParallelismManager.ACCELERATE: - if not is_accelerate_available(): - raise ImportError(NO_ACCELERATE_ERROR_MSG) + raise_if_package_not_available("accelerate") elif self.launcher_type == ParallelismManager.VLLM: - if not is_vllm_available(): - raise ImportError(NO_VLLM_ERROR_MSG) + raise_if_package_not_available("vllm") elif self.launcher_type == ParallelismManager.SGLANG: - if not is_sglang_available(): - raise ImportError(NO_SGLANG_ERROR_MSG) + raise_if_package_not_available("sglang") elif self.launcher_type == ParallelismManager.TGI: - if not is_tgi_available(): - raise ImportError(NO_TGI_ERROR_MSG) + raise_if_package_not_available("tgi") elif self.launcher_type == ParallelismManager.NANOTRON: - if not is_nanotron_available(): - raise ImportError(NO_NANOTRON_ERROR_MSG) + raise_if_package_not_available("nanotron") elif self.launcher_type == ParallelismManager.OPENAI: - if not is_openai_available(): - raise ImportError(NO_OPENAI_ERROR_MSG) + raise_if_package_not_available("openai") # Convert reasoning tags to list if needed if not isinstance(self.reasoning_tags, list): @@ -189,12 +171,12 @@ def __init__( def _init_parallelism_manager(self): accelerator, parallel_context = None, None if self.launcher_type == ParallelismManager.ACCELERATE: - if not is_accelerate_available(): + if not is_package_available("accelerate"): raise ValueError("You are trying to launch an accelerate model, but accelerate is not installed") accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))]) test_all_gather(accelerator=accelerator) elif self.launcher_type == ParallelismManager.NANOTRON: - if not is_nanotron_available(): + if not is_package_available("nanotron"): raise ValueError("You are trying to launch a nanotron model, but nanotron is not installed") dist.initialize_torch_distributed() parallel_context = ParallelContext( diff --git a/src/lighteval/tasks/extended/__init__.py b/src/lighteval/tasks/extended/__init__.py index cd60bdb1b..247a0c3a2 100644 --- a/src/lighteval/tasks/extended/__init__.py +++ b/src/lighteval/tasks/extended/__init__.py @@ -20,20 +20,15 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from lighteval.utils.imports import can_load_extended_tasks +import lighteval.tasks.extended.hle.main as hle +import lighteval.tasks.extended.ifbench.main as ifbench +import lighteval.tasks.extended.ifeval.main as ifeval +import lighteval.tasks.extended.lcb.main as lcb +import lighteval.tasks.extended.mix_eval.main as mix_eval +import lighteval.tasks.extended.mt_bench.main as mt_bench +import lighteval.tasks.extended.olympiade_bench.main as olympiad_bench +import lighteval.tasks.extended.tiny_benchmarks.main as tiny_benchmarks -if can_load_extended_tasks(): - import lighteval.tasks.extended.hle.main as hle - import lighteval.tasks.extended.ifbench.main as ifbench - import lighteval.tasks.extended.ifeval.main as ifeval - import lighteval.tasks.extended.lcb.main as lcb - import lighteval.tasks.extended.mix_eval.main as mix_eval - import lighteval.tasks.extended.mt_bench.main as mt_bench - import lighteval.tasks.extended.olympiade_bench.main as olympiad_bench - import lighteval.tasks.extended.tiny_benchmarks.main as tiny_benchmarks - AVAILABLE_EXTENDED_TASKS_MODULES = [ifeval, ifbench, tiny_benchmarks, mt_bench, mix_eval, olympiad_bench, hle, lcb] - -else: - AVAILABLE_EXTENDED_TASKS_MODULES = [] +AVAILABLE_EXTENDED_TASKS_MODULES = [ifeval, ifbench, tiny_benchmarks, mt_bench, mix_eval, olympiad_bench, hle, lcb] diff --git a/src/lighteval/tasks/extended/ifeval/instructions.py b/src/lighteval/tasks/extended/ifeval/instructions.py index 4022e640f..806125485 100644 --- a/src/lighteval/tasks/extended/ifeval/instructions.py +++ b/src/lighteval/tasks/extended/ifeval/instructions.py @@ -21,7 +21,11 @@ import re import string -import langdetect +from ....utils.imports import is_package_available + + +if is_package_available("langdetect"): + import langdetect import lighteval.tasks.extended.ifeval.instructions_utils as instructions_util diff --git a/src/lighteval/tasks/extended/ifeval/main.py b/src/lighteval/tasks/extended/ifeval/main.py index 1f63f91f1..bde8ae709 100644 --- a/src/lighteval/tasks/extended/ifeval/main.py +++ b/src/lighteval/tasks/extended/ifeval/main.py @@ -31,9 +31,11 @@ from lighteval.models.model_output import ModelResponse from lighteval.tasks.lighteval_task import LightevalTaskConfig from lighteval.tasks.requests import Doc, SamplingMethod +from lighteval.utils.imports import requires # Very specific task where there are no precise outputs but instead we test if the format obeys rules +@requires("langdetect") def ifeval_prompt(line, task_name: str = ""): return Doc( task_name=task_name, diff --git a/src/lighteval/utils/imports.py b/src/lighteval/utils/imports.py index 2534cb52a..f9ac5598a 100644 --- a/src/lighteval/utils/imports.py +++ b/src/lighteval/utils/imports.py @@ -11,117 +11,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import enum import importlib +from functools import lru_cache -def is_accelerate_available() -> bool: - return importlib.util.find_spec("accelerate") is not None - - -NO_ACCELERATE_ERROR_MSG = "You requested the use of accelerate for this evaluation, but it is not available in your current environement. Please install it using pip." - - -def is_tgi_available() -> bool: - return importlib.util.find_spec("text_generation") is not None - - -NO_TGI_ERROR_MSG = "You are trying to start a text generation inference endpoint, but text-generation is not present in your local environement. Please install it using pip." - - -def is_nanotron_available() -> bool: - return importlib.util.find_spec("nanotron") is not None - - -NO_NANOTRON_ERROR_MSG = "You requested the use of nanotron for this evaluation, but it is not available in your current environement. Please install it using pip." - - -def is_optimum_available() -> bool: - return importlib.util.find_spec("optimum") is not None - - -def is_bnb_available() -> bool: - return importlib.util.find_spec("bitsandbytes") is not None - - -NO_BNB_ERROR_MSG = "You are trying to load a model quantized with `bitsandbytes`, which is not available in your local environement. Please install it using pip." - - -def is_autogptq_available() -> bool: - return importlib.util.find_spec("auto_gptq") is not None - - -NO_AUTOGPTQ_ERROR_MSG = "You are trying to load a model quantized with `auto-gptq`, which is not available in your local environement. Please install it using pip." - - -def is_peft_available() -> bool: - return importlib.util.find_spec("peft") is not None - - -NO_PEFT_ERROR_MSG = "You are trying to use adapter weights models, for which you need `peft`, which is not available in your environment. Please install it using pip." - - -def is_tensorboardX_available() -> bool: - return importlib.util.find_spec("tensorboardX") is not None - - -NO_TENSORBOARDX_WARN_MSG = ( - "You are trying to log using tensorboardX, which is not installed. Please install it using pip. Skipping." -) - - -def is_openai_available() -> bool: - return importlib.util.find_spec("openai") is not None - +class Extras(enum.Enum): + MULTILINGUAL = "multilingual" + EXTENDED = "extended" -NO_OPENAI_ERROR_MSG = "You are trying to use an Open AI LLM as a judge, for which you need `openai`, which is not available in your environment. Please install it using pip." +@lru_cache() +def is_package_available(package_name: str): + if package_name == Extras.MULTILINGUAL: + return all(importlib.util.find_spec(package) is not None for package in ["stanza", "spacy", "langcodes"]) + if package_name == Extras.EXTENDED: + return all(importlib.util.find_spec(package) is not None for package in ["spacy"]) + else: + return importlib.util.find_spec(package_name) is not None -def is_litellm_available() -> bool: - return importlib.util.find_spec("litellm") is not None - -NO_LITELLM_ERROR_MSG = "You are trying to use a LiteLLM model, for which you need `litellm`, which is not available in your environment. Please install it using pip." - - -def is_vllm_available() -> bool: - return importlib.util.find_spec("vllm") is not None and importlib.util.find_spec("ray") is not None - - -NO_VLLM_ERROR_MSG = "You are trying to use an VLLM model, for which you need `vllm` and `ray`, which are not available in your environment. Please install them using pip, `pip install vllm ray`." - - -def is_sglang_available() -> bool: - return importlib.util.find_spec("sglang") is not None and importlib.util.find_spec("flashinfer") is not None - - -NO_SGLANG_ERROR_MSG = "You are trying to use an sglang model, for which you need `sglang` and `flashinfer`, which are not available in your environment. Please install them using pip, `pip install vllm ray`." - - -def can_load_extended_tasks() -> bool: - imports = [] - for package in ["langdetect", "openai"]: - imports.append(importlib.util.find_spec(package)) - - return all(cur_import is not None for cur_import in imports) - - -CANNOT_USE_EXTENDED_TASKS_MSG = "If you want to use extended_tasks, make sure you installed their dependencies using `pip install -e .[extended_tasks]`." - - -def can_load_multilingual_tasks() -> bool: - try: - import lighteval.tasks.multilingual.tasks # noqa: F401 - - return True - except ImportError: - return False - - -CANNOT_USE_MULTILINGUAL_TASKS_MSG = "If you want to use multilingual tasks, make sure you installed their dependencies using `pip install -e .[multilingual]`." - - -def can_load_spacy_tokenizer(language: str) -> bool: +@lru_cache() +def is_multilingual_package_available(language: str): imports = [] packages = ["spacy", "stanza"] if language == "vi": @@ -131,38 +42,41 @@ def can_load_spacy_tokenizer(language: str) -> bool: for package in packages: imports.append(importlib.util.find_spec(package)) - return all(cur_import is not None for cur_import in imports) - - -NO_SPACY_TOKENIZER_ERROR_MSG = "You are trying to load a spacy tokenizer, for which you need `spacy` and its dependencies, which are not available in your environment. Please install them using `pip install lighteval[multilingual]`." - -def can_load_stanza_tokenizer() -> bool: - return importlib.util.find_spec("stanza") is not None - - -NO_STANZA_TOKENIZER_ERROR_MSG = "You are trying to load a stanza tokenizer, for which you need `stanza`, which is not available in your environment. Please install it using `pip install lighteval[multilingual]`." + return all(cur_import is not None for cur_import in imports) -# Better than having to check import every time -def requires_latex2sympy2_extended(func): - checked_import = False +def raise_if_package_not_available(package_name: str | Extras, *, language: str = None): + if package_name == Extras.MULTILINGUAL and not is_multilingual_package_available(language): + raise ImportError(not_installed_error_message(package_name)) - def wrapper(*args, **kwargs): - nonlocal checked_import - if not checked_import and importlib.util.find_spec("latex2sympy2_extended") is None: - raise ImportError(NO_LATEX2SYMPY2_EXTENDED_ERROR_MSG) - checked_import = True - return func(*args, **kwargs) + if not is_package_available(package_name): + raise ImportError(not_installed_error_message(package_name)) - return wrapper +def not_installed_error_message(package_name: str | Extras) -> str: + if package_name == Extras.MULTILINGUAL: + return "You are trying to run an evaluation requiring multilingual capabilities. Please install the required extra: `pip install lighteval[multilingual]`" + elif package_name == Extras.EXTENDED: + return "You are trying to run an evaluation requiring additional extensions. Please install the required extra: `pip install lighteval[extended] " + elif package_name == "text_generation": + return "You are trying to start a text generation inference endpoint, but TGI is not present in your local environement. Please install it using pip." + elif package_name in ["bitsandbytes", "auto-gptq"]: + return f"You are trying to load a model quantized with `{package_name}`, which is not available in your local environement. Please install it using pip." + elif package_name == "peft": + return "You are trying to use adapter weights models, for which you need `peft`, which is not available in your environment. Please install it using pip." + elif package_name == "openai": + return "You are trying to use an Open AI LLM as a judge, for which you need `openai`, which is not available in your environment. Please install it using pip." -NO_LATEX2SYMPY2_EXTENDED_ERROR_MSG = "You are trying to parse latex expressions, for which you need `latex2sympy2_extended`, which is not available in your environment. Please install it using `pip install lighteval[math]`." + return f"You requested the use of `{package_name}` for this evaluation, but it is not available in your current environement. Please install it using pip." -def is_spacy_available() -> bool: - return importlib.util.find_spec("spacy") is not None +def requires(package_name): + def decorator(func): + def wrapper(*args, **kwargs): + raise_if_package_not_available(package_name) + return func(*args, **kwargs) + return wrapper -NO_SPACY_ERROR_MSG = "You are trying to use some metrics requiring `spacy`, which is not available in your environment. Please install it using pip." + return decorator diff --git a/src/lighteval/utils/parallelism.py b/src/lighteval/utils/parallelism.py index 2e73f4c73..896183160 100644 --- a/src/lighteval/utils/parallelism.py +++ b/src/lighteval/utils/parallelism.py @@ -27,12 +27,7 @@ import torch -from lighteval.utils.imports import ( - NO_ACCELERATE_ERROR_MSG, - NO_NANOTRON_ERROR_MSG, - is_accelerate_available, - is_nanotron_available, -) +from lighteval.utils.imports import raise_if_package_not_available logger = logging.getLogger(__name__) @@ -131,16 +126,14 @@ def test_all_gather(accelerator=None, parallel_context=None): ImportError: If the required accelerator or parallel context is not available. """ if accelerator: - if not is_accelerate_available(): - raise ImportError(NO_ACCELERATE_ERROR_MSG) + raise_if_package_not_available("accelerate") logger.info("Test gather tensor") test_tensor: torch.Tensor = torch.tensor([accelerator.process_index], device=accelerator.device) gathered_tensor: torch.Tensor = accelerator.gather(test_tensor) logger.info(f"gathered_tensor {gathered_tensor}, should be {list(range(accelerator.num_processes))}") accelerator.wait_for_everyone() elif parallel_context: - if not is_nanotron_available(): - raise ImportError(NO_NANOTRON_ERROR_MSG) + raise_if_package_not_available("nanotron") from nanotron import distributed as dist from nanotron import logging diff --git a/tests/unit/pipeline/test_reasoning_tags.py b/tests/unit/pipeline/test_reasoning_tags.py index 84dfb9e7e..f772970c4 100644 --- a/tests/unit/pipeline/test_reasoning_tags.py +++ b/tests/unit/pipeline/test_reasoning_tags.py @@ -35,7 +35,7 @@ from lighteval.tasks.lighteval_task import LightevalTask, LightevalTaskConfig from lighteval.tasks.registry import Registry from lighteval.tasks.requests import Doc, SamplingMethod -from lighteval.utils.imports import is_accelerate_available +from lighteval.utils.imports import is_package_available class TestPipelineReasoningTags(unittest.TestCase): @@ -129,7 +129,7 @@ def test_remove_reasoning_tags_enabled(self): ) # Initialize accelerator if available - if is_accelerate_available(): + if is_package_available("accelerate"): from accelerate import Accelerator Accelerator() @@ -175,7 +175,7 @@ def test_remove_reasoning_tags_enabled_tags_as_string(self): ) # Initialize accelerator if available - if is_accelerate_available(): + if is_package_available("accelerate"): from accelerate import Accelerator Accelerator() @@ -221,7 +221,7 @@ def test_remove_reasoning_tags_enabled_default_tags(self): ) # Initialize accelerator if available - if is_accelerate_available(): + if is_package_available("accelerate"): from accelerate import Accelerator Accelerator() @@ -264,7 +264,7 @@ def test_remove_reasoning_tags_disabled(self): ) # Initialize accelerator if available - if is_accelerate_available(): + if is_package_available("accelerate"): from accelerate import Accelerator Accelerator() @@ -310,7 +310,7 @@ def test_custom_reasoning_tags(self): ) # Initialize accelerator if available - if is_accelerate_available(): + if is_package_available("accelerate"): from accelerate import Accelerator Accelerator() @@ -356,7 +356,7 @@ def test_multiple_reasoning_tags(self): ) # Initialize accelerator if available - if is_accelerate_available(): + if is_package_available("accelerate"): from accelerate import Accelerator Accelerator() diff --git a/tests/unit/utils/test_caching.py b/tests/unit/utils/test_caching.py index 47ea599ae..1fc3595e0 100644 --- a/tests/unit/utils/test_caching.py +++ b/tests/unit/utils/test_caching.py @@ -237,9 +237,9 @@ def test_cache_vllm(self, mock_create_model, mock_greedy_until, mock_loglikeliho @patch("lighteval.models.endpoints.tgi_model.ModelClient._loglikelihood") def test_cache_tgi(self, mock_loglikelihood, mock_greedy_until, mock_requests_get): from lighteval.models.endpoints.tgi_model import ModelClient, TGIModelConfig - from lighteval.utils.imports import is_tgi_available + from lighteval.utils.imports import is_package_available - if not is_tgi_available(): + if not is_package_available("tgi"): pytest.skip("Skipping because missing the imports") # Mock TGI requests diff --git a/tests/utils.py b/tests/utils.py index 7954b3531..3b68dd631 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -35,7 +35,7 @@ from lighteval.tasks.registry import Registry from lighteval.tasks.requests import Doc from lighteval.utils.cache_management import SampleCache -from lighteval.utils.imports import is_accelerate_available +from lighteval.utils.imports import is_package_available class FakeModelConfig(ModelConfig): @@ -117,7 +117,7 @@ def load_tasks(self): # This is due to logger complaining we have no initialised the accelerator # It's hard to mock as it's global singleton - if is_accelerate_available(): + if is_package_available("accelerate"): from accelerate import Accelerator Accelerator() From bd04ddb80441384f1f1f686e6c75604cdea27a38 Mon Sep 17 00:00:00 2001 From: Lysandre Date: Tue, 16 Sep 2025 13:53:35 +0200 Subject: [PATCH 2/7] v2 --- pyproject.toml | 1 + src/lighteval/metrics/normalizations.py | 2 +- src/lighteval/models/model_loader.py | 5 +- .../tasks/extended/ifeval/instructions.py | 2 +- src/lighteval/tasks/extended/ifeval/main.py | 1 + src/lighteval/tasks/registry.py | 25 +--- src/lighteval/utils/imports.py | 112 ++++++++++++++++-- src/lighteval/utils/parallelism.py | 3 - 8 files changed, 108 insertions(+), 43 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 75fff7cd6..64643f2fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,6 +84,7 @@ dependencies = [ "fsspec>=2023.12.2", "httpx>=0.27.2", "latex2sympy2_extended==1.0.6", + "langcodes" ] [project.optional-dependencies] diff --git a/src/lighteval/metrics/normalizations.py b/src/lighteval/metrics/normalizations.py index 925448f17..98d22b70b 100644 --- a/src/lighteval/metrics/normalizations.py +++ b/src/lighteval/metrics/normalizations.py @@ -450,9 +450,9 @@ def get_multilingual_normalizer(lang: Language, lower: bool = True) -> Callable[ Returns: Callable[[str], str]: A function that normalizes text for the specified language """ - tokenizer = get_word_tokenizer(lang) def _inner_normalizer(text: str) -> str: + tokenizer = get_word_tokenizer(lang) text = remove_articles(text, lang) text = remove_punc(text) if lower: diff --git a/src/lighteval/models/model_loader.py b/src/lighteval/models/model_loader.py index 46129960d..fce7ff1af 100644 --- a/src/lighteval/models/model_loader.py +++ b/src/lighteval/models/model_loader.py @@ -43,7 +43,7 @@ from lighteval.models.transformers.transformers_model import TransformersModel, TransformersModelConfig from lighteval.models.transformers.vlm_transformers_model import VLMTransformersModel, VLMTransformersModelConfig from lighteval.models.vllm.vllm_model import AsyncVLLMModel, VLLMModel, VLLMModelConfig -from lighteval.utils.imports import raise_if_package_not_available, requires +from lighteval.utils.imports import raise_if_package_not_available logger = logging.getLogger(__name__) @@ -92,14 +92,12 @@ def load_model( # noqa: C901 return load_inference_providers_model(config=config) -@requires("tgi") def load_model_with_tgi(config: TGIModelConfig): logger.info(f"Load model from inference server: {config.inference_server_address}") model = ModelClient(config=config) return model -@requires("litellm") def load_litellm_model(config: LiteLLMModelConfig): model = LiteLLMClient(config) return model @@ -171,6 +169,5 @@ def load_inference_providers_model(config: InferenceProvidersModelConfig): return InferenceProvidersClient(config=config) -@requires("sglang") def load_sglang_model(config: SGLangModelConfig): return SGLangModel(config=config) diff --git a/src/lighteval/tasks/extended/ifeval/instructions.py b/src/lighteval/tasks/extended/ifeval/instructions.py index 806125485..06b7cf85c 100644 --- a/src/lighteval/tasks/extended/ifeval/instructions.py +++ b/src/lighteval/tasks/extended/ifeval/instructions.py @@ -21,7 +21,7 @@ import re import string -from ....utils.imports import is_package_available +from lighteval.utils.imports import is_package_available if is_package_available("langdetect"): diff --git a/src/lighteval/tasks/extended/ifeval/main.py b/src/lighteval/tasks/extended/ifeval/main.py index bde8ae709..ae7d42809 100644 --- a/src/lighteval/tasks/extended/ifeval/main.py +++ b/src/lighteval/tasks/extended/ifeval/main.py @@ -125,6 +125,7 @@ def compute(self, doc: Doc, model_response: ModelResponse, **kwargs) -> dict: } +@requires("langdetect") def agg_inst_level_acc(items): flat_items = [item for sublist in items for item in sublist] inst_level_acc = sum(flat_items) / len(flat_items) diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py index 01125c778..51e0ba377 100644 --- a/src/lighteval/tasks/registry.py +++ b/src/lighteval/tasks/registry.py @@ -36,12 +36,6 @@ import lighteval.tasks.default_tasks as default_tasks from lighteval.tasks.extended import AVAILABLE_EXTENDED_TASKS_MODULES from lighteval.tasks.lighteval_task import LightevalTask, LightevalTaskConfig -from lighteval.utils.imports import ( - CANNOT_USE_EXTENDED_TASKS_MSG, - CANNOT_USE_MULTILINGUAL_TASKS_MSG, - can_load_extended_tasks, - can_load_multilingual_tasks, -) # Import community tasks @@ -121,9 +115,9 @@ def __init__( self, tasks: str | Path | None = None, custom_tasks: str | Path | ModuleType | None = None, - load_community: bool = False, - load_extended: bool = False, - load_multilingual: bool = False, + load_community: bool = True, + load_extended: bool = True, + load_multilingual: bool = True, ): """ Initialize the Registry class. @@ -219,17 +213,6 @@ def _activate_loading_of_optional_suite(self) -> None: f"Suite {suite_name} unknown. This is not normal, unless you are testing adding new evaluations." ) - if "extended" in suites: - if not can_load_extended_tasks(): - raise ImportError(CANNOT_USE_EXTENDED_TASKS_MSG) - self._load_extended = True - if "multilingual" in suites: - if not can_load_multilingual_tasks(): - raise ImportError(CANNOT_USE_MULTILINGUAL_TASKS_MSG) - self._load_multilingual = True - if "community" in suites: - self._load_community = True - def _load_full_registry(self) -> dict[str, LightevalTaskConfig]: """ Returns: @@ -251,8 +234,6 @@ def _load_full_registry(self) -> dict[str, LightevalTaskConfig]: if self._load_extended: for extended_task_module in AVAILABLE_EXTENDED_TASKS_MODULES: custom_tasks_module.append(extended_task_module) - else: - logger.warning(CANNOT_USE_EXTENDED_TASKS_MSG) # Need to load community tasks if self._load_community: diff --git a/src/lighteval/utils/imports.py b/src/lighteval/utils/imports.py index f9ac5598a..f312dccba 100644 --- a/src/lighteval/utils/imports.py +++ b/src/lighteval/utils/imports.py @@ -12,8 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. import enum +import functools import importlib +import inspect +import operator +import re +from enum import Enum from functools import lru_cache +from typing import Callable + +from packaging.requirements import Requirement +from packaging.version import Version class Extras(enum.Enum): @@ -22,9 +31,9 @@ class Extras(enum.Enum): @lru_cache() -def is_package_available(package_name: str): +def is_package_available(package_name: str | Extras): if package_name == Extras.MULTILINGUAL: - return all(importlib.util.find_spec(package) is not None for package in ["stanza", "spacy", "langcodes"]) + return all(importlib.util.find_spec(package) is not None for package in ["stanza", "spacy"]) if package_name == Extras.EXTENDED: return all(importlib.util.find_spec(package) is not None for package in ["spacy"]) else: @@ -46,12 +55,14 @@ def is_multilingual_package_available(language: str): return all(cur_import is not None for cur_import in imports) -def raise_if_package_not_available(package_name: str | Extras, *, language: str = None): +def raise_if_package_not_available(package_name: str | Extras, *, language: str = None, object_name: str = None): + prefix = "You" if object_name is None else f"Through the use of {object_name}, you" + if package_name == Extras.MULTILINGUAL and not is_multilingual_package_available(language): - raise ImportError(not_installed_error_message(package_name)) + raise ImportError(prefix + not_installed_error_message(package_name)[3:]) if not is_package_available(package_name): - raise ImportError(not_installed_error_message(package_name)) + raise ImportError(prefix + not_installed_error_message(package_name)[3:]) def not_installed_error_message(package_name: str | Extras) -> str: @@ -71,12 +82,89 @@ def not_installed_error_message(package_name: str | Extras) -> str: return f"You requested the use of `{package_name}` for this evaluation, but it is not available in your current environement. Please install it using pip." -def requires(package_name): - def decorator(func): - def wrapper(*args, **kwargs): - raise_if_package_not_available(package_name) - return func(*args, **kwargs) +class DummyObject(type): + """ + Metaclass for the dummy objects. Any class inheriting from it will return the ImportError generated by + `requires_backend` each time a user tries to access any method of that class. + """ + + is_dummy = True + + def __getattribute__(cls, key): + if (key.startswith("_") and key != "_from_config") or key == "is_dummy" or key == "mro" or key == "call": + return super().__getattribute__(key) + + for backend in cls._backends: + raise_if_package_not_available(backend) + + +class VersionComparison(Enum): + EQUAL = operator.eq + NOT_EQUAL = operator.ne + GREATER_THAN = operator.gt + LESS_THAN = operator.lt + GREATER_THAN_OR_EQUAL = operator.ge + LESS_THAN_OR_EQUAL = operator.le + + @staticmethod + def from_string(version_string: str) -> Callable[[int | Version, int | Version], bool]: + string_to_operator = { + "=": VersionComparison.EQUAL.value, + "==": VersionComparison.EQUAL.value, + "!=": VersionComparison.NOT_EQUAL.value, + ">": VersionComparison.GREATER_THAN.value, + "<": VersionComparison.LESS_THAN.value, + ">=": VersionComparison.GREATER_THAN_OR_EQUAL.value, + "<=": VersionComparison.LESS_THAN_OR_EQUAL.value, + } + + return string_to_operator[version_string] + + +@lru_cache +def split_package_version(package_version_str) -> tuple[str, str, str]: + pattern = r"([a-zA-Z0-9_-]+)([!<>=~]+)([0-9.]+)" + match = re.match(pattern, package_version_str) + if match: + return (match.group(1), match.group(2), match.group(3)) + else: + raise ValueError(f"Invalid package version string: {package_version_str}") + + +def requires(*backends): + """ + Decorator to raise an ImportError if the decorated object (function or class) requires a dependency + which is not installed. + """ + + applied_backends = [] + for backend in backends: + applied_backends.append(Requirement(backend.value if isinstance(backend, Extras) else backend)) + + def inner_fn(_object): + _object._backends = applied_backends + + if inspect.isclass(_object): + + class Placeholder(metaclass=DummyObject): + _backends = applied_backends + + def __init__(self, *args, **kwargs): + for backend in self._backends: + raise_if_package_not_available(backend.name, object_name=_object.__class__.__name__) + + Placeholder.__name__ = _object.__name__ + Placeholder.__module__ = _object.__module__ + + return Placeholder + else: + + @functools.wraps(_object) + def wrapper(*args, **kwargs): + for backend in _object._backends: + raise_if_package_not_available(backend.name, object_name=_object.__name__) + return _object(*args, **kwargs) - return wrapper + return wrapper - return decorator + return inner_fn diff --git a/src/lighteval/utils/parallelism.py b/src/lighteval/utils/parallelism.py index 896183160..cedf1a07a 100644 --- a/src/lighteval/utils/parallelism.py +++ b/src/lighteval/utils/parallelism.py @@ -121,9 +121,6 @@ def test_all_gather(accelerator=None, parallel_context=None): Args: accelerator (Optional): The accelerator object used for parallelism. parallel_context (Optional): The parallel context object used for parallelism. - - Raises: - ImportError: If the required accelerator or parallel context is not available. """ if accelerator: raise_if_package_not_available("accelerate") From 9dee2ef65f09807ef3bc7e1157ec291b26d9f312 Mon Sep 17 00:00:00 2001 From: Lysandre Date: Tue, 16 Sep 2025 13:57:45 +0200 Subject: [PATCH 3/7] Simplify --- src/lighteval/utils/imports.py | 38 ---------------------------------- 1 file changed, 38 deletions(-) diff --git a/src/lighteval/utils/imports.py b/src/lighteval/utils/imports.py index f312dccba..65788ef12 100644 --- a/src/lighteval/utils/imports.py +++ b/src/lighteval/utils/imports.py @@ -15,14 +15,9 @@ import functools import importlib import inspect -import operator -import re -from enum import Enum from functools import lru_cache -from typing import Callable from packaging.requirements import Requirement -from packaging.version import Version class Extras(enum.Enum): @@ -98,39 +93,6 @@ def __getattribute__(cls, key): raise_if_package_not_available(backend) -class VersionComparison(Enum): - EQUAL = operator.eq - NOT_EQUAL = operator.ne - GREATER_THAN = operator.gt - LESS_THAN = operator.lt - GREATER_THAN_OR_EQUAL = operator.ge - LESS_THAN_OR_EQUAL = operator.le - - @staticmethod - def from_string(version_string: str) -> Callable[[int | Version, int | Version], bool]: - string_to_operator = { - "=": VersionComparison.EQUAL.value, - "==": VersionComparison.EQUAL.value, - "!=": VersionComparison.NOT_EQUAL.value, - ">": VersionComparison.GREATER_THAN.value, - "<": VersionComparison.LESS_THAN.value, - ">=": VersionComparison.GREATER_THAN_OR_EQUAL.value, - "<=": VersionComparison.LESS_THAN_OR_EQUAL.value, - } - - return string_to_operator[version_string] - - -@lru_cache -def split_package_version(package_version_str) -> tuple[str, str, str]: - pattern = r"([a-zA-Z0-9_-]+)([!<>=~]+)([0-9.]+)" - match = re.match(pattern, package_version_str) - if match: - return (match.group(1), match.group(2), match.group(3)) - else: - raise ValueError(f"Invalid package version string: {package_version_str}") - - def requires(*backends): """ Decorator to raise an ImportError if the decorated object (function or class) requires a dependency From ac2d18eca061c163ccde0b4e370ec2866029d958 Mon Sep 17 00:00:00 2001 From: Lysandre Date: Tue, 16 Sep 2025 15:44:22 +0200 Subject: [PATCH 4/7] Tests! --- .../metrics/imports/data_stats_metric.py | 3 +- src/lighteval/metrics/normalizations.py | 2 + .../metrics/utils/linguistic_tokenizers.py | 5 +- .../tasks/extended/ifbench/instructions.py | 12 ++- src/lighteval/utils/imports.py | 14 +-- tests/test_dependencies.py | 89 +++++++++++++++++++ 6 files changed, 114 insertions(+), 11 deletions(-) create mode 100644 tests/test_dependencies.py diff --git a/src/lighteval/metrics/imports/data_stats_metric.py b/src/lighteval/metrics/imports/data_stats_metric.py index 818c83e0e..bb4e5021e 100644 --- a/src/lighteval/metrics/imports/data_stats_metric.py +++ b/src/lighteval/metrics/imports/data_stats_metric.py @@ -30,7 +30,7 @@ from typing import Literal from lighteval.metrics.imports.data_stats_utils import Fragments -from lighteval.utils.imports import raise_if_package_not_available +from lighteval.utils.imports import Extras, raise_if_package_not_available, requires logger = logging.getLogger(__name__) @@ -55,6 +55,7 @@ def find_ngrams(input_list, n): return zip(*[input_list[i:] for i in range(n)]) +@requires(Extras.MULTILINGUAL) class DataStatsMetric(Metric): def __init__( self, diff --git a/src/lighteval/metrics/normalizations.py b/src/lighteval/metrics/normalizations.py index 98d22b70b..d6261df93 100644 --- a/src/lighteval/metrics/normalizations.py +++ b/src/lighteval/metrics/normalizations.py @@ -28,6 +28,7 @@ from typing import Callable from lighteval.metrics.utils.linguistic_tokenizers import get_word_tokenizer +from lighteval.utils.imports import Extras, requires from lighteval.utils.language import Language @@ -444,6 +445,7 @@ def remove_punc(text: str) -> str: return "".join(ch for ch in text if ch not in PUNCT) +@requires(Extras.MULTILINGUAL) def get_multilingual_normalizer(lang: Language, lower: bool = True) -> Callable[[str], str]: """Get a normalizer function for the specified language. diff --git a/src/lighteval/metrics/utils/linguistic_tokenizers.py b/src/lighteval/metrics/utils/linguistic_tokenizers.py index 137ac3417..641095742 100644 --- a/src/lighteval/metrics/utils/linguistic_tokenizers.py +++ b/src/lighteval/metrics/utils/linguistic_tokenizers.py @@ -20,6 +20,7 @@ from lighteval.utils.imports import ( Extras, raise_if_package_not_available, + requires, ) from lighteval.utils.language import Language @@ -97,11 +98,10 @@ def span_tokenize(self, text: str) -> list[tuple[int, int]]: return list(self.tokenizer.span_tokenize(text)) +@requires(Extras.MULTILINGUAL) class SpaCyTokenizer(WordTokenizer): def __init__(self, spacy_language: str, config=None): super().__init__() - raise_if_package_not_available(Extras.MULTILINGUAL, language=spacy_language) - self.spacy_language = spacy_language self.config = config self._tokenizer = None @@ -135,6 +135,7 @@ def span_tokenize(self, text: str) -> list[tuple[int, int]]: ] +@requires("stanza") class StanzaTokenizer(WordTokenizer): def __init__(self, stanza_language: str, **stanza_kwargs): super().__init__() diff --git a/src/lighteval/tasks/extended/ifbench/instructions.py b/src/lighteval/tasks/extended/ifbench/instructions.py index ccb5b50da..0c4f0a9a0 100644 --- a/src/lighteval/tasks/extended/ifbench/instructions.py +++ b/src/lighteval/tasks/extended/ifbench/instructions.py @@ -25,8 +25,15 @@ import emoji import nltk -import spacy -import syllapy + +from lighteval.utils.imports import is_package_available, requires + + +if is_package_available("syllapy"): + import syllapy + +if is_package_available("spacy"): + import spacy import lighteval.tasks.extended.ifeval.instructions_utils as instructions_util @@ -61,6 +68,7 @@ RESOURCES_DOWNLOADED: bool = False +@requires("syllapy", "spacy") class Instruction: """An instruction template.""" diff --git a/src/lighteval/utils/imports.py b/src/lighteval/utils/imports.py index 65788ef12..fce67c169 100644 --- a/src/lighteval/utils/imports.py +++ b/src/lighteval/utils/imports.py @@ -53,7 +53,9 @@ def is_multilingual_package_available(language: str): def raise_if_package_not_available(package_name: str | Extras, *, language: str = None, object_name: str = None): prefix = "You" if object_name is None else f"Through the use of {object_name}, you" - if package_name == Extras.MULTILINGUAL and not is_multilingual_package_available(language): + if package_name == Extras.MULTILINGUAL and ( + (language is not None) or (not is_multilingual_package_available(language)) + ): raise ImportError(prefix + not_installed_error_message(package_name)[3:]) if not is_package_available(package_name): @@ -61,20 +63,20 @@ def raise_if_package_not_available(package_name: str | Extras, *, language: str def not_installed_error_message(package_name: str | Extras) -> str: - if package_name == Extras.MULTILINGUAL: + if package_name == Extras.MULTILINGUAL.value: return "You are trying to run an evaluation requiring multilingual capabilities. Please install the required extra: `pip install lighteval[multilingual]`" - elif package_name == Extras.EXTENDED: + elif package_name == Extras.EXTENDED.value: return "You are trying to run an evaluation requiring additional extensions. Please install the required extra: `pip install lighteval[extended] " elif package_name == "text_generation": - return "You are trying to start a text generation inference endpoint, but TGI is not present in your local environement. Please install it using pip." + return "You are trying to start a text generation inference endpoint, but TGI is not present in your local environment. Please install it using pip." elif package_name in ["bitsandbytes", "auto-gptq"]: - return f"You are trying to load a model quantized with `{package_name}`, which is not available in your local environement. Please install it using pip." + return f"You are trying to load a model quantized with `{package_name}`, which is not available in your local environment. Please install it using pip." elif package_name == "peft": return "You are trying to use adapter weights models, for which you need `peft`, which is not available in your environment. Please install it using pip." elif package_name == "openai": return "You are trying to use an Open AI LLM as a judge, for which you need `openai`, which is not available in your environment. Please install it using pip." - return f"You requested the use of `{package_name}` for this evaluation, but it is not available in your current environement. Please install it using pip." + return f"You requested the use of `{package_name}` for this evaluation, but it is not available in your current environment. Please install it using pip." class DummyObject(type): diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py new file mode 100644 index 000000000..6be223c67 --- /dev/null +++ b/tests/test_dependencies.py @@ -0,0 +1,89 @@ +# MIT License + +# Copyright (c) 2024 The HuggingFace Team + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# tests/utils/pretend_missing.py +import functools +import importlib + +import pytest + +import lighteval.utils.imports as imports + + +def pretend_missing(*names): + """ + Decorator: pretend that certain packages are missing + by patching mypkg.utils.is_package_available. + """ + + def decorator(test_func): + @functools.wraps(test_func) + def wrapper(*args, **kwargs): + from unittest.mock import patch + + def fake(name): + return False if name in names else (importlib.util.find_spec(name) is not None) + + with patch.object(imports, "is_package_available", side_effect=fake): + # If your module caches results at import time, reload here + import lighteval + + importlib.reload(lighteval) + + return test_func(*args, **kwargs) + + return wrapper + + return decorator + + +@pretend_missing("langdetect") +def test_langdetect_required_for_ifeval(): + from lighteval.main_accelerate import accelerate + + with pytest.raises( + ImportError, + match="Through the use of ifeval_prompt, you requested the use of `langdetect` for this evaluation, but it is not available in your current environment. Please install it using pip.", + ): + accelerate(model_args="model_name=gpt2,batch_size=1", tasks="extended|ifeval|0", max_samples=0) + + +@pretend_missing("spacy", "stanza") +def test_multilingual_required_for_xnli(): + from lighteval.main_accelerate import accelerate + + with pytest.raises( + ImportError, + match="Through the use of get_multilingual_normalizer, you are trying to run an evaluation requiring multilingual capabilities. Please install the required extra: `pip install lighteval[multilingual]`", + ): + accelerate(model_args="model_name=gpt2,batch_size=1", tasks="lighteval|xnli_zho_mcf|0", max_samples=0) + + +@pretend_missing("vllm") +def test_vllm_required_for_vllm_usage(): + from lighteval.main_vllm import vllm + + with pytest.raises( + ImportError, + match="You requested the use of `vllm` for this evaluation, but it is not available in your current environment. Please install it using pip.'", + ): + vllm(model_args="model_name=gpt2,batch_size=1", tasks="lighteval|xnli_zho_mcf|0", max_samples=0) From 2c6f61a52a864e67f96ce0182b9771b9fdcaed73 Mon Sep 17 00:00:00 2001 From: Lysandre Date: Thu, 18 Sep 2025 13:07:48 +0200 Subject: [PATCH 5/7] Update --- pyproject.toml | 3 +- .../metrics/imports/data_stats_metric.py | 5 +- src/lighteval/metrics/normalizations.py | 4 +- .../metrics/utils/linguistic_tokenizers.py | 6 +- src/lighteval/models/endpoints/tgi_model.py | 4 +- src/lighteval/models/model_loader.py | 2 - src/lighteval/pipeline.py | 17 +-- src/lighteval/tasks/registry.py | 13 ++- src/lighteval/utils/imports.py | 104 +++++++++++++----- tests/test_dependencies.py | 12 +- 10 files changed, 106 insertions(+), 64 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 64643f2fb..bbcc33a6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,7 +84,8 @@ dependencies = [ "fsspec>=2023.12.2", "httpx>=0.27.2", "latex2sympy2_extended==1.0.6", - "langcodes" + "langcodes", + "sglang" ] [project.optional-dependencies] diff --git a/src/lighteval/metrics/imports/data_stats_metric.py b/src/lighteval/metrics/imports/data_stats_metric.py index bb4e5021e..a007bcf5d 100644 --- a/src/lighteval/metrics/imports/data_stats_metric.py +++ b/src/lighteval/metrics/imports/data_stats_metric.py @@ -30,7 +30,7 @@ from typing import Literal from lighteval.metrics.imports.data_stats_utils import Fragments -from lighteval.utils.imports import Extras, raise_if_package_not_available, requires +from lighteval.utils.imports import Extra, requires logger = logging.getLogger(__name__) @@ -55,7 +55,7 @@ def find_ngrams(input_list, n): return zip(*[input_list[i:] for i in range(n)]) -@requires(Extras.MULTILINGUAL) +@requires(Extra.MULTILINGUAL) class DataStatsMetric(Metric): def __init__( self, @@ -87,7 +87,6 @@ def __init__( determines the spaCy model used for tokenization. Currently supports English, German, French, and Italian. """ - raise_if_package_not_available("spacy") import spacy self.n_gram = n_gram diff --git a/src/lighteval/metrics/normalizations.py b/src/lighteval/metrics/normalizations.py index d6261df93..ef55681b1 100644 --- a/src/lighteval/metrics/normalizations.py +++ b/src/lighteval/metrics/normalizations.py @@ -28,7 +28,7 @@ from typing import Callable from lighteval.metrics.utils.linguistic_tokenizers import get_word_tokenizer -from lighteval.utils.imports import Extras, requires +from lighteval.utils.imports import Extra, requires from lighteval.utils.language import Language @@ -445,7 +445,7 @@ def remove_punc(text: str) -> str: return "".join(ch for ch in text if ch not in PUNCT) -@requires(Extras.MULTILINGUAL) +@requires(Extra.MULTILINGUAL) def get_multilingual_normalizer(lang: Language, lower: bool = True) -> Callable[[str], str]: """Get a normalizer function for the specified language. diff --git a/src/lighteval/metrics/utils/linguistic_tokenizers.py b/src/lighteval/metrics/utils/linguistic_tokenizers.py index 641095742..a5670a268 100644 --- a/src/lighteval/metrics/utils/linguistic_tokenizers.py +++ b/src/lighteval/metrics/utils/linguistic_tokenizers.py @@ -18,8 +18,7 @@ from typing import Callable, Iterator from lighteval.utils.imports import ( - Extras, - raise_if_package_not_available, + Extra, requires, ) from lighteval.utils.language import Language @@ -98,7 +97,7 @@ def span_tokenize(self, text: str) -> list[tuple[int, int]]: return list(self.tokenizer.span_tokenize(text)) -@requires(Extras.MULTILINGUAL) +@requires(Extra.MULTILINGUAL) class SpaCyTokenizer(WordTokenizer): def __init__(self, spacy_language: str, config=None): super().__init__() @@ -139,7 +138,6 @@ def span_tokenize(self, text: str) -> list[tuple[int, int]]: class StanzaTokenizer(WordTokenizer): def __init__(self, stanza_language: str, **stanza_kwargs): super().__init__() - raise_if_package_not_available("stanza") self.stanza_language = stanza_language self.stanza_kwargs = stanza_kwargs self._tokenizer = None diff --git a/src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/endpoints/tgi_model.py index 4b4847fe9..46ddfce5a 100644 --- a/src/lighteval/models/endpoints/tgi_model.py +++ b/src/lighteval/models/endpoints/tgi_model.py @@ -32,7 +32,7 @@ from lighteval.models.endpoints.endpoint_model import InferenceEndpointModel from lighteval.tasks.prompt_manager import PromptManager from lighteval.utils.cache_management import SampleCache -from lighteval.utils.imports import is_package_available, requires +from lighteval.utils.imports import Extra, is_package_available, requires if is_package_available("tgi"): @@ -99,7 +99,7 @@ class TGIModelConfig(ModelConfig): # inherit from InferenceEndpointModel instead of LightevalModel since they both use the same interface, and only overwrite # the client functions, since they use a different client. -@requires("tgi") +@requires(Extra.TGI) class ModelClient(InferenceEndpointModel): _DEFAULT_MAX_LENGTH: int = 4096 diff --git a/src/lighteval/models/model_loader.py b/src/lighteval/models/model_loader.py index fce7ff1af..e3c710edd 100644 --- a/src/lighteval/models/model_loader.py +++ b/src/lighteval/models/model_loader.py @@ -43,7 +43,6 @@ from lighteval.models.transformers.transformers_model import TransformersModel, TransformersModelConfig from lighteval.models.transformers.vlm_transformers_model import VLMTransformersModel, VLMTransformersModelConfig from lighteval.models.vllm.vllm_model import AsyncVLLMModel, VLLMModel, VLLMModelConfig -from lighteval.utils.imports import raise_if_package_not_available logger = logging.getLogger(__name__) @@ -148,7 +147,6 @@ def load_model_with_accelerate_or_default( elif isinstance(config, DeltaModelConfig): model = DeltaModel(config=config) elif isinstance(config, VLLMModelConfig): - raise_if_package_not_available("vllm") if config.is_async: model = AsyncVLLMModel(config=config) else: diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py index 1416ce7c8..fab0ea7d4 100644 --- a/src/lighteval/pipeline.py +++ b/src/lighteval/pipeline.py @@ -42,7 +42,7 @@ from lighteval.tasks.lighteval_task import LightevalTask from lighteval.tasks.registry import Registry from lighteval.tasks.requests import SamplingMethod -from lighteval.utils.imports import is_package_available, raise_if_package_not_available +from lighteval.utils.imports import is_package_available from lighteval.utils.parallelism import test_all_gather from lighteval.utils.utils import make_results_table, remove_reasoning_tags @@ -96,21 +96,6 @@ class PipelineParameters: bootstrap_iters: int = 1000 def __post_init__(self): # noqa C901 - # Import testing - if self.launcher_type == ParallelismManager.ACCELERATE: - raise_if_package_not_available("accelerate") - elif self.launcher_type == ParallelismManager.VLLM: - raise_if_package_not_available("vllm") - elif self.launcher_type == ParallelismManager.SGLANG: - raise_if_package_not_available("sglang") - elif self.launcher_type == ParallelismManager.TGI: - raise_if_package_not_available("tgi") - elif self.launcher_type == ParallelismManager.NANOTRON: - raise_if_package_not_available("nanotron") - elif self.launcher_type == ParallelismManager.OPENAI: - raise_if_package_not_available("openai") - - # Convert reasoning tags to list if needed if not isinstance(self.reasoning_tags, list): try: self.reasoning_tags = ast.literal_eval(self.reasoning_tags) diff --git a/src/lighteval/tasks/registry.py b/src/lighteval/tasks/registry.py index 51e0ba377..95914991c 100644 --- a/src/lighteval/tasks/registry.py +++ b/src/lighteval/tasks/registry.py @@ -115,9 +115,9 @@ def __init__( self, tasks: str | Path | None = None, custom_tasks: str | Path | ModuleType | None = None, - load_community: bool = True, - load_extended: bool = True, - load_multilingual: bool = True, + load_community: bool = False, + load_extended: bool = False, + load_multilingual: bool = False, ): """ Initialize the Registry class. @@ -213,6 +213,13 @@ def _activate_loading_of_optional_suite(self) -> None: f"Suite {suite_name} unknown. This is not normal, unless you are testing adding new evaluations." ) + if "extended" in suites: + self._load_extended = True + if "multilingual" in suites: + self._load_multilingual = True + if "community" in suites: + self._load_community = True + def _load_full_registry(self) -> dict[str, LightevalTaskConfig]: """ Returns: diff --git a/src/lighteval/utils/imports.py b/src/lighteval/utils/imports.py index fce67c169..d7e2f4357 100644 --- a/src/lighteval/utils/imports.py +++ b/src/lighteval/utils/imports.py @@ -15,24 +15,64 @@ import functools import importlib import inspect +import re +from collections import defaultdict from functools import lru_cache +from importlib.metadata import PackageNotFoundError, metadata, version +from typing import Dict, List, Tuple from packaging.requirements import Requirement +from packaging.version import Version -class Extras(enum.Enum): +# These extras should exist in the pyproject.toml file +class Extra(enum.Enum): MULTILINGUAL = "multilingual" EXTENDED = "extended" + TGI = "tgi" @lru_cache() -def is_package_available(package_name: str | Extras): - if package_name == Extras.MULTILINGUAL: - return all(importlib.util.find_spec(package) is not None for package in ["stanza", "spacy"]) - if package_name == Extras.EXTENDED: - return all(importlib.util.find_spec(package) is not None for package in ["spacy"]) +def is_package_available(package: str | Requirement | Extra): + deps, deps_by_extra = required_dependencies() + + if isinstance(package, str): + package = deps[package] + + if isinstance(package, Extra): + dependencies = deps_by_extra[package.value] + return all(is_package_available(_package) for _package in dependencies) else: - return importlib.util.find_spec(package_name) is not None + try: + installed = Version(version(package.name)) + except PackageNotFoundError: + return False + + # No version constraint → any installed version is OK + if not package.specifier: + return True + + return installed in package.specifier + + +@lru_cache() +def required_dependencies() -> Tuple[Dict[str, Requirement], Dict[str, List[Requirement]]]: + md = metadata("lighteval") + requires_dist = md.get_all("Requires-Dist") or [] + deps_by_extra = defaultdict(list) + deps = {} + + for dep in requires_dist: + extra = None + if ";" in dep: + dep, marker = dep.split(";", 1) + match = re.search(r'extra\s*==\s*"(.*?)"', marker) + extra = match.group(1) if match else None + requirement = Requirement(dep.strip()) + deps_by_extra[extra].append(requirement) + deps[requirement.name] = requirement + + return deps, deps_by_extra @lru_cache() @@ -50,33 +90,32 @@ def is_multilingual_package_available(language: str): return all(cur_import is not None for cur_import in imports) -def raise_if_package_not_available(package_name: str | Extras, *, language: str = None, object_name: str = None): +def raise_if_package_not_available(package: Requirement | Extra, *, language: str = None, object_name: str = None): prefix = "You" if object_name is None else f"Through the use of {object_name}, you" - if package_name == Extras.MULTILINGUAL and ( - (language is not None) or (not is_multilingual_package_available(language)) - ): - raise ImportError(prefix + not_installed_error_message(package_name)[3:]) + if package == Extra.MULTILINGUAL and ((language is not None) or (not is_multilingual_package_available(language))): + raise ImportError(prefix + not_installed_error_message(package)[3:]) - if not is_package_available(package_name): - raise ImportError(prefix + not_installed_error_message(package_name)[3:]) + if not is_package_available(package): + raise ImportError(prefix + not_installed_error_message(package)[3:]) -def not_installed_error_message(package_name: str | Extras) -> str: - if package_name == Extras.MULTILINGUAL.value: +def not_installed_error_message(package: Requirement) -> str: + if package == Extra.MULTILINGUAL.value: return "You are trying to run an evaluation requiring multilingual capabilities. Please install the required extra: `pip install lighteval[multilingual]`" - elif package_name == Extras.EXTENDED.value: + elif package == Extra.EXTENDED.value: return "You are trying to run an evaluation requiring additional extensions. Please install the required extra: `pip install lighteval[extended] " - elif package_name == "text_generation": + elif package == "text_generation": return "You are trying to start a text generation inference endpoint, but TGI is not present in your local environment. Please install it using pip." - elif package_name in ["bitsandbytes", "auto-gptq"]: - return f"You are trying to load a model quantized with `{package_name}`, which is not available in your local environment. Please install it using pip." - elif package_name == "peft": + elif package == "peft": return "You are trying to use adapter weights models, for which you need `peft`, which is not available in your environment. Please install it using pip." - elif package_name == "openai": + elif package == "openai": return "You are trying to use an Open AI LLM as a judge, for which you need `openai`, which is not available in your environment. Please install it using pip." - return f"You requested the use of `{package_name}` for this evaluation, but it is not available in your current environment. Please install it using pip." + if isinstance(package, Extra): + return f"You are trying to run an evaluation requiring {package.value} capabilities. Please install the required extra: `pip install lighteval[{package.value}]`" + else: + return f"You requested the use of `{package}` for this evaluation, but it is not available in your current environment. Please install it using pip." class DummyObject(type): @@ -101,9 +140,22 @@ def requires(*backends): which is not installed. """ + requirements, _ = required_dependencies() + applied_backends = [] for backend in backends: - applied_backends.append(Requirement(backend.value if isinstance(backend, Extras) else backend)) + if isinstance(backend, Extra): + applied_backends.append(backend) + else: + if backend not in requirements: + raise RuntimeError( + "A dependency was specified with @requires, but it is not defined in the possible dependencies " + f"defined in the pyproject.toml: `{backend}`." + f"" + f"If editing the pyproject.toml to add a new dependency, remember to reinstall lighteval for the" + f"update to take effect." + ) + applied_backends.append(requirements[backend]) def inner_fn(_object): _object._backends = applied_backends @@ -115,7 +167,7 @@ class Placeholder(metaclass=DummyObject): def __init__(self, *args, **kwargs): for backend in self._backends: - raise_if_package_not_available(backend.name, object_name=_object.__class__.__name__) + raise_if_package_not_available(backend, object_name=_object.__name__) Placeholder.__name__ = _object.__name__ Placeholder.__module__ = _object.__module__ @@ -126,7 +178,7 @@ def __init__(self, *args, **kwargs): @functools.wraps(_object) def wrapper(*args, **kwargs): for backend in _object._backends: - raise_if_package_not_available(backend.name, object_name=_object.__name__) + raise_if_package_not_available(backend, object_name=_object.__name__) return _object(*args, **kwargs) return wrapper diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py index 6be223c67..e3f3908d6 100644 --- a/tests/test_dependencies.py +++ b/tests/test_dependencies.py @@ -25,6 +25,7 @@ import importlib import pytest +from packaging.requirements import Requirement import lighteval.utils.imports as imports @@ -40,7 +41,8 @@ def decorator(test_func): def wrapper(*args, **kwargs): from unittest.mock import patch - def fake(name): + def fake(requirement): + name = requirement.name if isinstance(requirement, Requirement) else requirement return False if name in names else (importlib.util.find_spec(name) is not None) with patch.object(imports, "is_package_available", side_effect=fake): @@ -73,9 +75,9 @@ def test_multilingual_required_for_xnli(): with pytest.raises( ImportError, - match="Through the use of get_multilingual_normalizer, you are trying to run an evaluation requiring multilingual capabilities. Please install the required extra: `pip install lighteval[multilingual]`", + match="Through the use of get_multilingual_normalizer, you are trying to run an evaluation requiring multilingual capabilities.", ): - accelerate(model_args="model_name=gpt2,batch_size=1", tasks="lighteval|xnli_zho_mcf|0", max_samples=0) + accelerate(model_args="model_name=gpt2,batch_size=1", tasks="multilingual|xnli_zho_mcf|0", max_samples=0) @pretend_missing("vllm") @@ -84,6 +86,6 @@ def test_vllm_required_for_vllm_usage(): with pytest.raises( ImportError, - match="You requested the use of `vllm` for this evaluation, but it is not available in your current environment. Please install it using pip.'", + match="Through the use of VLLMModel, you requested the use of `vllm<0.10.2,>=0.10.0` for this evaluation, but it is not available in your current environment. Please install it using pip.", ): - vllm(model_args="model_name=gpt2,batch_size=1", tasks="lighteval|xnli_zho_mcf|0", max_samples=0) + vllm(model_args="model_name=gpt2", tasks="lighteval|aime24|0", max_samples=0) From e2fe723130475d852cc31b7c2086569d960f6aa3 Mon Sep 17 00:00:00 2001 From: Lysandre Date: Thu, 18 Sep 2025 13:11:52 +0200 Subject: [PATCH 6/7] Docs --- src/lighteval/utils/imports.py | 13 +++++++++++++ tests/test_dependencies.py | 3 +++ 2 files changed, 16 insertions(+) diff --git a/src/lighteval/utils/imports.py b/src/lighteval/utils/imports.py index d7e2f4357..d11696fa6 100644 --- a/src/lighteval/utils/imports.py +++ b/src/lighteval/utils/imports.py @@ -34,11 +34,17 @@ class Extra(enum.Enum): @lru_cache() def is_package_available(package: str | Requirement | Extra): + """ + Check if a package is installed in the environment. Returns True if that's the case, False otherwise. + """ deps, deps_by_extra = required_dependencies() + # If the package is a string, it will get the potential required version from the pyproject.toml if isinstance(package, str): package = deps[package] + # If the specified package is an "Extra", we will iterate through each required dependency of that extra + # and their version and check their existence. if isinstance(package, Extra): dependencies = deps_by_extra[package.value] return all(is_package_available(_package) for _package in dependencies) @@ -57,6 +63,9 @@ def is_package_available(package: str | Requirement | Extra): @lru_cache() def required_dependencies() -> Tuple[Dict[str, Requirement], Dict[str, List[Requirement]]]: + """ + Parse the pyproject.toml file and return a dictionary mapping package names to requirements. + """ md = metadata("lighteval") requires_dist = md.get_all("Requires-Dist") or [] deps_by_extra = defaultdict(list) @@ -101,6 +110,10 @@ def raise_if_package_not_available(package: Requirement | Extra, *, language: st def not_installed_error_message(package: Requirement) -> str: + """ + Custom error messages if need be. + """ + if package == Extra.MULTILINGUAL.value: return "You are trying to run an evaluation requiring multilingual capabilities. Please install the required extra: `pip install lighteval[multilingual]`" elif package == Extra.EXTENDED.value: diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py index e3f3908d6..4b4f23f65 100644 --- a/tests/test_dependencies.py +++ b/tests/test_dependencies.py @@ -71,6 +71,9 @@ def test_langdetect_required_for_ifeval(): @pretend_missing("spacy", "stanza") def test_multilingual_required_for_xnli(): + """ + This checks that the Extra.MULTILINGUAL correctly raises if there are missing dependencies. + """ from lighteval.main_accelerate import accelerate with pytest.raises( From 5c79e2bf006f995b4dcdaf8c239239a75ab36306 Mon Sep 17 00:00:00 2001 From: Lysandre Date: Thu, 18 Sep 2025 13:15:30 +0200 Subject: [PATCH 7/7] Final fixes --- pyproject.toml | 2 +- src/lighteval/models/endpoints/tgi_model.py | 5 +- src/lighteval/models/sglang/sglang_model.py | 4 +- src/lighteval/utils/imports.py | 51 +++++++---- tests/test_dependencies.py | 95 +++++++++----------- tests/unit/models/test_transformers_model.py | 4 +- tests/unit/utils/test_caching.py | 16 +--- 7 files changed, 86 insertions(+), 91 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index bbcc33a6e..24631db9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,7 +85,6 @@ dependencies = [ "httpx>=0.27.2", "latex2sympy2_extended==1.0.6", "langcodes", - "sglang" ] [project.optional-dependencies] @@ -100,6 +99,7 @@ nanotron = [ ] tensorboardX = ["tensorboardX"] vllm = ["vllm>=0.10.0,<0.10.2", "ray", "more_itertools"] +sglang = ["sglang"] quality = ["ruff>=v0.11.0","pre-commit"] tests = ["pytest>=7.4.0","deepdiff","pip>=25.2"] dev = ["lighteval[accelerate,quality,tests,multilingual,math,extended_tasks,vllm]"] diff --git a/src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/endpoints/tgi_model.py index 46ddfce5a..4fd765b8d 100644 --- a/src/lighteval/models/endpoints/tgi_model.py +++ b/src/lighteval/models/endpoints/tgi_model.py @@ -35,7 +35,7 @@ from lighteval.utils.imports import Extra, is_package_available, requires -if is_package_available("tgi"): +if is_package_available(Extra.TGI): from text_generation import AsyncClient else: from unittest.mock import Mock @@ -99,7 +99,6 @@ class TGIModelConfig(ModelConfig): # inherit from InferenceEndpointModel instead of LightevalModel since they both use the same interface, and only overwrite # the client functions, since they use a different client. -@requires(Extra.TGI) class ModelClient(InferenceEndpointModel): _DEFAULT_MAX_LENGTH: int = 4096 @@ -134,6 +133,7 @@ def __init__(self, config: TGIModelConfig) -> None: # Initialize cache for tokenization and predictions self._cache = SampleCache(config) + @requires(Extra.TGI) def _async_process_request( self, context: str, @@ -173,6 +173,7 @@ def _async_process_request( return generated_text + @requires(Extra.TGI) def _process_request(self, *args, **kwargs) -> TextGenerationOutput: return asyncio.run(self._async_process_request(*args, **kwargs)) diff --git a/src/lighteval/models/sglang/sglang_model.py b/src/lighteval/models/sglang/sglang_model.py index 220d5159b..e5c0f4d87 100644 --- a/src/lighteval/models/sglang/sglang_model.py +++ b/src/lighteval/models/sglang/sglang_model.py @@ -138,7 +138,6 @@ class SGLangModelConfig(ModelConfig): override_chat_template: bool = None -@requires("sglang") class SGLangModel(LightevalModel): def __init__( self, @@ -187,7 +186,7 @@ def add_special_tokens(self): def max_length(self) -> int: return self._max_length - def _create_auto_model(self, config: SGLangModelConfig) -> Optional[Engine]: + def _create_auto_model(self, config: SGLangModelConfig) -> Optional["Engine"]: self.model_args = { "model_path": config.model_name, "trust_remote_code": config.trust_remote_code, @@ -314,6 +313,7 @@ def _greedy_until( results.append(cur_response) return dataset.get_original_order(results) + @requires("sglang") def _generate( self, inputs: list[list[int]], diff --git a/src/lighteval/utils/imports.py b/src/lighteval/utils/imports.py index d11696fa6..048526cbc 100644 --- a/src/lighteval/utils/imports.py +++ b/src/lighteval/utils/imports.py @@ -41,6 +41,11 @@ def is_package_available(package: str | Requirement | Extra): # If the package is a string, it will get the potential required version from the pyproject.toml if isinstance(package, str): + if package not in deps: + raise RuntimeError( + f"Package {package} was tested against, but isn't specified in the pyproject.toml file. Please specify" + f"it as a potential dependency or an extra for it to be checked." + ) package = deps[package] # If the specified package is an "Extra", we will iterate through each required dependency of that extra @@ -75,6 +80,10 @@ def required_dependencies() -> Tuple[Dict[str, Requirement], Dict[str, List[Requ extra = None if ";" in dep: dep, marker = dep.split(";", 1) + + # The `metadata` function prints requirements as follows + # 'vllm<0.10.2,>=0.10.0; extra == "vllm"' + # The regex searches for "extra == " in order to parse the marker. match = re.search(r'extra\s*==\s*"(.*?)"', marker) extra = match.group(1) if match else None requirement = Requirement(dep.strip()) @@ -146,30 +155,38 @@ def __getattribute__(cls, key): for backend in cls._backends: raise_if_package_not_available(backend) + return super().__getattribute__(key) -def requires(*backends): - """ - Decorator to raise an ImportError if the decorated object (function or class) requires a dependency - which is not installed. - """ +def parse_specified_backends(specified_backends): requirements, _ = required_dependencies() - applied_backends = [] - for backend in backends: + + for backend in specified_backends: if isinstance(backend, Extra): - applied_backends.append(backend) + applied_backends.append(backend if isinstance(backend, Extra) else requirements[backend]) + elif backend not in requirements: + raise RuntimeError( + "A dependency was specified with @requires, but it is not defined in the possible dependencies " + f"defined in the pyproject.toml: `{backend}`." + f"" + f"If editing the pyproject.toml to add a new dependency, remember to reinstall lighteval for the" + f"update to take effect." + ) else: - if backend not in requirements: - raise RuntimeError( - "A dependency was specified with @requires, but it is not defined in the possible dependencies " - f"defined in the pyproject.toml: `{backend}`." - f"" - f"If editing the pyproject.toml to add a new dependency, remember to reinstall lighteval for the" - f"update to take effect." - ) applied_backends.append(requirements[backend]) + return applied_backends + + +def requires(*specified_backends): + """ + Decorator to raise an ImportError if the decorated object (function or class) requires a dependency + which is not installed. + """ + + applied_backends = parse_specified_backends(specified_backends) + def inner_fn(_object): _object._backends = applied_backends @@ -185,7 +202,7 @@ def __init__(self, *args, **kwargs): Placeholder.__name__ = _object.__name__ Placeholder.__module__ = _object.__module__ - return Placeholder + return _object if all(is_package_available(backend) for backend in applied_backends) else Placeholder else: @functools.wraps(_object) diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py index 4b4f23f65..694e5ec47 100644 --- a/tests/test_dependencies.py +++ b/tests/test_dependencies.py @@ -1,17 +1,17 @@ # MIT License - +# # Copyright (c) 2024 The HuggingFace Team - +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. - +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -20,75 +20,60 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -# tests/utils/pretend_missing.py -import functools -import importlib - import pytest -from packaging.requirements import Requirement - -import lighteval.utils.imports as imports - - -def pretend_missing(*names): - """ - Decorator: pretend that certain packages are missing - by patching mypkg.utils.is_package_available. - """ - - def decorator(test_func): - @functools.wraps(test_func) - def wrapper(*args, **kwargs): - from unittest.mock import patch - - def fake(requirement): - name = requirement.name if isinstance(requirement, Requirement) else requirement - return False if name in names else (importlib.util.find_spec(name) is not None) - with patch.object(imports, "is_package_available", side_effect=fake): - # If your module caches results at import time, reload here - import lighteval +from lighteval.utils.imports import Extra, is_package_available, requires - importlib.reload(lighteval) - return test_func(*args, **kwargs) +def test_requires(): + @requires("sglang") + class RandomModel: + pass - return wrapper + assert RandomModel.__name__ == "RandomModel" + assert RandomModel.__class__.__name__ == "DummyObject" - return decorator + with pytest.raises( + ImportError, + match="Through the use of RandomModel, you requested the use of `sglang` for this evaluation, but it is not available in your current environment. Please install it using pip.", + ): + RandomModel() -@pretend_missing("langdetect") -def test_langdetect_required_for_ifeval(): - from lighteval.main_accelerate import accelerate +def test_requires_with_extra(): + @requires(Extra.TGI) + class RandomModel: + pass with pytest.raises( ImportError, - match="Through the use of ifeval_prompt, you requested the use of `langdetect` for this evaluation, but it is not available in your current environment. Please install it using pip.", + match=r"Through the use of RandomModel, you are trying to run an evaluation requiring tgi capabilities. Please install the required extra: `pip install lighteval\[tgi\]`", ): - accelerate(model_args="model_name=gpt2,batch_size=1", tasks="extended|ifeval|0", max_samples=0) + RandomModel() -@pretend_missing("spacy", "stanza") -def test_multilingual_required_for_xnli(): - """ - This checks that the Extra.MULTILINGUAL correctly raises if there are missing dependencies. - """ - from lighteval.main_accelerate import accelerate - +def test_requires_with_wrong_dependency(): with pytest.raises( - ImportError, - match="Through the use of get_multilingual_normalizer, you are trying to run an evaluation requiring multilingual capabilities.", + RuntimeError, + match="A dependency was specified with @requires, but it is not defined in the possible dependencies defined in the pyproject.toml: `random_dependency`", ): - accelerate(model_args="model_name=gpt2,batch_size=1", tasks="multilingual|xnli_zho_mcf|0", max_samples=0) + @requires("random_dependency") + class RandomModel: + pass -@pretend_missing("vllm") -def test_vllm_required_for_vllm_usage(): - from lighteval.main_vllm import vllm +def test_is_package_available(): + assert is_package_available("torch") + + +def test_is_package_unavailable(): + assert not is_package_available("tensorboardX") + + +def test_is_package_is_not_specified_in_pyproject_toml(): with pytest.raises( - ImportError, - match="Through the use of VLLMModel, you requested the use of `vllm<0.10.2,>=0.10.0` for this evaluation, but it is not available in your current environment. Please install it using pip.", + RuntimeError, + match="Package tensorflow was tested against, but isn't specified in the pyproject.toml file. Please specifyit as a potential dependency or an extra for it to be checked.", ): - vllm(model_args="model_name=gpt2", tasks="lighteval|aime24|0", max_samples=0) + is_package_available("tensorflow") diff --git a/tests/unit/models/test_transformers_model.py b/tests/unit/models/test_transformers_model.py index 1cb2cf230..da7c925ae 100644 --- a/tests/unit/models/test_transformers_model.py +++ b/tests/unit/models/test_transformers_model.py @@ -398,9 +398,9 @@ def mock_gather(tensor): class TestTransformersModelUseChatTemplate(unittest.TestCase): @patch("lighteval.models.transformers.transformers_model.Accelerator") @patch("lighteval.models.transformers.transformers_model.TransformersModel._create_auto_model") - @patch("lighteval.utils.imports.is_accelerate_available") + @patch("lighteval.utils.imports.is_package_available") def test_transformers_model_use_chat_template_with_different_model_names( - self, mock_accelerator, mock_create_model, is_accelerate_available + self, mock_accelerator, mock_create_model, is_package_available ): """Test that TransformersModel correctly determines whether to use_chat_template or not automatically from the tokenizer config.""" test_cases = [ diff --git a/tests/unit/utils/test_caching.py b/tests/unit/utils/test_caching.py index 1fc3595e0..7ab8644be 100644 --- a/tests/unit/utils/test_caching.py +++ b/tests/unit/utils/test_caching.py @@ -32,6 +32,7 @@ from lighteval.models.model_output import ModelResponse from lighteval.tasks.requests import Doc, SamplingMethod from lighteval.utils.cache_management import SampleCache +from lighteval.utils.imports import Extra, is_package_available class TestCaching(unittest.TestCase): @@ -177,12 +178,9 @@ def _test_cache(self, model: LightevalModel, test_cases): @patch("lighteval.models.transformers.transformers_model.TransformersModel._loglikelihood_tokens") @patch("lighteval.models.transformers.transformers_model.TransformersModel._padded_greedy_until") - @patch("lighteval.utils.imports.is_accelerate_available") @patch("lighteval.models.transformers.transformers_model.Accelerator") @patch("lighteval.models.transformers.transformers_model.TransformersModel._create_auto_model") - def test_cache_transformers( - self, mock_create_model, mock_accelerator, mock_is_accelerate_available, mock_greedy_until, mock_loglikelihood - ): + def test_cache_transformers(self, mock_create_model, mock_accelerator, mock_greedy_until, mock_loglikelihood): from lighteval.models.transformers.transformers_model import TransformersModel, TransformersModelConfig # Skip the model creation phase @@ -192,7 +190,6 @@ def test_cache_transformers( mock_accelerator_instance = Mock() mock_accelerator_instance.device = torch.device("cpu") mock_accelerator.return_value = mock_accelerator_instance - mock_is_accelerate_available = False # noqa F841 mock_greedy_until.return_value = self.model_responses mock_loglikelihood.return_value = self.model_responses @@ -237,9 +234,8 @@ def test_cache_vllm(self, mock_create_model, mock_greedy_until, mock_loglikeliho @patch("lighteval.models.endpoints.tgi_model.ModelClient._loglikelihood") def test_cache_tgi(self, mock_loglikelihood, mock_greedy_until, mock_requests_get): from lighteval.models.endpoints.tgi_model import ModelClient, TGIModelConfig - from lighteval.utils.imports import is_package_available - if not is_package_available("tgi"): + if not is_package_available(Extra.TGI): pytest.skip("Skipping because missing the imports") # Mock TGI requests @@ -320,12 +316,9 @@ def test_cache_sglang( ) @patch("lighteval.models.transformers.vlm_transformers_model.VLMTransformersModel._greedy_until") - @patch("lighteval.utils.imports.is_accelerate_available") @patch("lighteval.models.transformers.vlm_transformers_model.Accelerator") @patch("lighteval.models.transformers.vlm_transformers_model.VLMTransformersModel._create_auto_model") - def test_cache_vlm_transformers( - self, mock_create_model, mock_accelerator, is_accelerate_available, mock_greedy_until - ): + def test_cache_vlm_transformers(self, mock_create_model, mock_accelerator, mock_greedy_until): from lighteval.models.transformers.vlm_transformers_model import ( VLMTransformersModel, VLMTransformersModelConfig, @@ -335,7 +328,6 @@ def test_cache_vlm_transformers( mock_accelerator_instance = Mock() mock_accelerator_instance.device = torch.device("cpu") mock_accelerator.return_value = mock_accelerator_instance - is_accelerate_available = False # noqa F841 # Skip the model creation phase mock_create_model = Mock() # noqa F841