From a1aca362f13dd7e91e2b649308c3fb4590d19ed3 Mon Sep 17 00:00:00 2001 From: Abhishek Divekar Date: Thu, 13 Feb 2025 20:04:13 +0530 Subject: [PATCH] Made fmcore compatible with Numpy v1.20+ and v2.0+ for vLLM Also improved messaging. --- src/fmcore/framework/_evaluator/RayEvaluator.py | 4 ++-- src/fmcore/framework/_task/classification.py | 3 ++- src/fmcore/framework/_trainer/RayTuneTrainer.py | 8 ++++---- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/fmcore/framework/_evaluator/RayEvaluator.py b/src/fmcore/framework/_evaluator/RayEvaluator.py index 7aa13d6..33ede37 100644 --- a/src/fmcore/framework/_evaluator/RayEvaluator.py +++ b/src/fmcore/framework/_evaluator/RayEvaluator.py @@ -649,7 +649,7 @@ def _run_evaluation( progress_bar, total=num_actors_created, desc=f"Submitting {input_len_str} rows", - unit="submissions", + unit="batch", ) ## Each actor streams data from Dask dataframe on the cluster: if not isinstance(data, DaskScalableDataFrame): @@ -693,7 +693,7 @@ def _run_evaluation( progress_bar, total=math.ceil(input_len / submission_batch_size), desc=f"Submitting {input_len_str} rows", - unit="submissions", + unit="batch", ) ## Initialize to zero: rows_completed: int = ray.get(row_counter.get_rows_completed.remote()) diff --git a/src/fmcore/framework/_task/classification.py b/src/fmcore/framework/_task/classification.py index b859f12..88dd84d 100644 --- a/src/fmcore/framework/_task/classification.py +++ b/src/fmcore/framework/_task/classification.py @@ -17,6 +17,7 @@ from bears import ScalableDataFrame, ScalableSeries from bears.processor import EncodingRange, LabelEncoding from bears.util import all_are_np_subtypes, as_list, as_tuple, is_list_or_set_like, safe_validate_arguments +from bears.util.language._import import np_bool, np_floating, np_integer from pydantic import constr from fmcore.constants import DataLayout, DataSplit, MLType, MLTypeSchema, Task @@ -268,7 +269,7 @@ def from_top_k( labels: np.ndarray = np.array(labels) if isinstance(scores, (list, tuple)): scores: np.ndarray = np.array(scores) - if not all_are_np_subtypes(scores.dtype, {np.bool, np.integer, np.floating}): + if not all_are_np_subtypes(scores.dtype, {np_bool, np_integer, np_floating}): raise ValueError( f"Expected scores array to have dtype as bool, int or float; found: {scores.dtype}" ) diff --git a/src/fmcore/framework/_trainer/RayTuneTrainer.py b/src/fmcore/framework/_trainer/RayTuneTrainer.py index 07055c0..8ccfc65 100644 --- a/src/fmcore/framework/_trainer/RayTuneTrainer.py +++ b/src/fmcore/framework/_trainer/RayTuneTrainer.py @@ -29,8 +29,8 @@ set_param_from_alias, type_str, ) -from bears.util.language._import import _IS_RAY_INSTALLED, _IS_TORCH_INSTALLED -from pydantic import conint, model_validator, ConfigDict +from bears.util.language._import import _IS_RAY_INSTALLED, _IS_TORCH_INSTALLED, np_number +from pydantic import ConfigDict, conint, model_validator from fmcore.constants import DataLayout from fmcore.framework._algorithm import Algorithm @@ -93,7 +93,7 @@ def _ray_put_metric_value(metric: Metric) -> Any: assert isinstance(metric.value, pd.DataFrame) return _RAY_METRIC_IS_DATAFRAME_PREFIX + metric.value.to_json(orient="records") value: Any = metric.value - if isinstance(value, (int, float, str)) or np.issubdtype(type(value), np.number): + if isinstance(value, (int, float, str)) or np.issubdtype(type(value), np_number): return value buf = io.BytesIO() pickle.dump(value, buf) @@ -106,7 +106,7 @@ def _ray_get_metric_value(value: Optional[Any]) -> Optional[Any]: return None if isinstance(value, str) and value.startswith(_RAY_METRIC_IS_DATAFRAME_PREFIX): return pd.DataFrame(json.loads(value.removeprefix(_RAY_METRIC_IS_DATAFRAME_PREFIX))) - if isinstance(value, (int, float, str)) or np.issubdtype(type(value), np.number): + if isinstance(value, (int, float, str)) or np.issubdtype(type(value), np_number): return value if not isinstance(value, io.BytesIO): raise ValueError(f"Expected metric value to be of type BytesIO, found type: {type_str(value)}")