From a1aca362f13dd7e91e2b649308c3fb4590d19ed3 Mon Sep 17 00:00:00 2001
From: Abhishek Divekar <adivekar@amazon.com>
Date: Thu, 13 Feb 2025 20:04:13 +0530
Subject: [PATCH]  Made fmcore compatible with Numpy v1.20+ and v2.0+ for vLLM

Also improved messaging.
---
 src/fmcore/framework/_evaluator/RayEvaluator.py | 4 ++--
 src/fmcore/framework/_task/classification.py    | 3 ++-
 src/fmcore/framework/_trainer/RayTuneTrainer.py | 8 ++++----
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/fmcore/framework/_evaluator/RayEvaluator.py b/src/fmcore/framework/_evaluator/RayEvaluator.py
index 7aa13d6..33ede37 100644
--- a/src/fmcore/framework/_evaluator/RayEvaluator.py
+++ b/src/fmcore/framework/_evaluator/RayEvaluator.py
@@ -649,7 +649,7 @@ def _run_evaluation(
                         progress_bar,
                         total=num_actors_created,
                         desc=f"Submitting {input_len_str} rows",
-                        unit="submissions",
+                        unit="batch",
                     )
                     ## Each actor streams data from Dask dataframe on the cluster:
                     if not isinstance(data, DaskScalableDataFrame):
@@ -693,7 +693,7 @@ def _run_evaluation(
                         progress_bar,
                         total=math.ceil(input_len / submission_batch_size),
                         desc=f"Submitting {input_len_str} rows",
-                        unit="submissions",
+                        unit="batch",
                     )
                     ## Initialize to zero:
                     rows_completed: int = ray.get(row_counter.get_rows_completed.remote())
diff --git a/src/fmcore/framework/_task/classification.py b/src/fmcore/framework/_task/classification.py
index b859f12..88dd84d 100644
--- a/src/fmcore/framework/_task/classification.py
+++ b/src/fmcore/framework/_task/classification.py
@@ -17,6 +17,7 @@
 from bears import ScalableDataFrame, ScalableSeries
 from bears.processor import EncodingRange, LabelEncoding
 from bears.util import all_are_np_subtypes, as_list, as_tuple, is_list_or_set_like, safe_validate_arguments
+from bears.util.language._import import np_bool, np_floating, np_integer
 from pydantic import constr
 
 from fmcore.constants import DataLayout, DataSplit, MLType, MLTypeSchema, Task
@@ -268,7 +269,7 @@ def from_top_k(
             labels: np.ndarray = np.array(labels)
         if isinstance(scores, (list, tuple)):
             scores: np.ndarray = np.array(scores)
-        if not all_are_np_subtypes(scores.dtype, {np.bool, np.integer, np.floating}):
+        if not all_are_np_subtypes(scores.dtype, {np_bool, np_integer, np_floating}):
             raise ValueError(
                 f"Expected scores array to have dtype as bool, int or float; found: {scores.dtype}"
             )
diff --git a/src/fmcore/framework/_trainer/RayTuneTrainer.py b/src/fmcore/framework/_trainer/RayTuneTrainer.py
index 07055c0..8ccfc65 100644
--- a/src/fmcore/framework/_trainer/RayTuneTrainer.py
+++ b/src/fmcore/framework/_trainer/RayTuneTrainer.py
@@ -29,8 +29,8 @@
     set_param_from_alias,
     type_str,
 )
-from bears.util.language._import import _IS_RAY_INSTALLED, _IS_TORCH_INSTALLED
-from pydantic import conint, model_validator, ConfigDict
+from bears.util.language._import import _IS_RAY_INSTALLED, _IS_TORCH_INSTALLED, np_number
+from pydantic import ConfigDict, conint, model_validator
 
 from fmcore.constants import DataLayout
 from fmcore.framework._algorithm import Algorithm
@@ -93,7 +93,7 @@ def _ray_put_metric_value(metric: Metric) -> Any:
         assert isinstance(metric.value, pd.DataFrame)
         return _RAY_METRIC_IS_DATAFRAME_PREFIX + metric.value.to_json(orient="records")
     value: Any = metric.value
-    if isinstance(value, (int, float, str)) or np.issubdtype(type(value), np.number):
+    if isinstance(value, (int, float, str)) or np.issubdtype(type(value), np_number):
         return value
     buf = io.BytesIO()
     pickle.dump(value, buf)
@@ -106,7 +106,7 @@ def _ray_get_metric_value(value: Optional[Any]) -> Optional[Any]:
         return None
     if isinstance(value, str) and value.startswith(_RAY_METRIC_IS_DATAFRAME_PREFIX):
         return pd.DataFrame(json.loads(value.removeprefix(_RAY_METRIC_IS_DATAFRAME_PREFIX)))
-    if isinstance(value, (int, float, str)) or np.issubdtype(type(value), np.number):
+    if isinstance(value, (int, float, str)) or np.issubdtype(type(value), np_number):
         return value
     if not isinstance(value, io.BytesIO):
         raise ValueError(f"Expected metric value to be of type BytesIO, found type: {type_str(value)}")