diff --git a/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_type_inference.py b/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_type_inference.py index 7e60f552e9c7a..169ddd2c55f4d 100644 --- a/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_type_inference.py +++ b/python/pyspark/tests/upstream/pyarrow/test_pyarrow_array_type_inference.py @@ -299,6 +299,8 @@ def test_pandas_series_numpy_backed(self): # pandas >= 3 infers large_string instead of string for object-dtype string Series string_type = pa.large_string() if LooseVersion(pd.__version__) >= "3.0.0" else pa.string() + # pandas >= 3 defaults to microsecond resolution instead of nanosecond + ts_unit = "us" if LooseVersion(pd.__version__) >= "3.0.0" else "ns" sg = ZoneInfo("Asia/Singapore") la = "America/Los_Angeles" @@ -324,17 +326,17 @@ def test_pandas_series_numpy_backed(self): (pd.Series([True, False, True]), pa.bool_()), # Temporal (pd.Series([date1, date2]), pa.date32()), - (pd.Series(pd.to_datetime(["2024-01-01", "2024-01-02"])), pa.timestamp("ns")), - (pd.Series([pd.Timestamp("1970-01-01")]), pa.timestamp("ns")), - (pd.Series([pd.Timestamp.min]), pa.timestamp("ns")), - (pd.Series([pd.Timestamp.max]), pa.timestamp("ns")), - (pd.Series(pd.to_timedelta(["1 day", "2 hours"])), pa.duration("ns")), - (pd.Series([pd.Timedelta(0)]), pa.duration("ns")), - (pd.Series([pd.Timedelta.min]), pa.duration("ns")), - (pd.Series([pd.Timedelta.max]), pa.duration("ns")), + (pd.Series(pd.to_datetime(["2024-01-01", "2024-01-02"])), pa.timestamp(ts_unit)), + (pd.Series([pd.Timestamp("1970-01-01")]), pa.timestamp(ts_unit)), + (pd.Series([pd.Timestamp.min]), pa.timestamp(ts_unit)), + (pd.Series([pd.Timestamp.max]), pa.timestamp(ts_unit)), + (pd.Series(pd.to_timedelta(["1 day", "2 hours"])), pa.duration(ts_unit)), + (pd.Series([pd.Timedelta(0)]), pa.duration(ts_unit)), + (pd.Series([pd.Timedelta.min]), pa.duration(ts_unit)), + (pd.Series([pd.Timedelta.max]), pa.duration(ts_unit)), # Timezone-aware - (pd.Series([dt1_sg, dt2_sg]), pa.timestamp("ns", tz="Asia/Singapore")), - (pd.Series([ts1_la, ts2_la]), pa.timestamp("ns", tz=la)), + (pd.Series([dt1_sg, dt2_sg]), pa.timestamp(ts_unit, tz="Asia/Singapore")), + (pd.Series([ts1_la, ts2_la]), pa.timestamp(ts_unit, tz=la)), # Binary (pd.Series([b"hello", b"world"]), pa.binary()), # Nested