Skip to content

Commit 2b36521

Browse files
authored
GH-36642: [Python][CI] Configure warnings as errors during pytest (#37018)
### Rationale for this change Warnings are constantly being introduced into the pyarrow tests. Let's try enforcing them as errors in an effort to keep the codebase healthy. ### What changes are included in this PR? * Fixed existing warnings * Set warnings as errors in CI ### Are these changes tested? Yes, ran pytests locally w/o warnings. ### Are there any user-facing changes? No * Closes: #36642 Authored-by: Dane Pitkin <dane@voltrondata.com> Signed-off-by: Sutou Kouhei <kou@clear-code.com>
1 parent 3bb13da commit 2b36521

File tree

7 files changed

+34
-21
lines changed

7 files changed

+34
-21
lines changed

dev/tasks/tasks.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1253,6 +1253,7 @@ tasks:
12531253
params:
12541254
env:
12551255
PYTHON: "{{ python_version }}"
1256+
PYTEST_ARGS: "-W error"
12561257
image: conda-python
12571258
{% endfor %}
12581259

@@ -1265,7 +1266,7 @@ tasks:
12651266
HYPOTHESIS_PROFILE: ci
12661267
PYARROW_TEST_HYPOTHESIS: ON
12671268
# limit to execute hypothesis tests only
1268-
PYTEST_ARGS: "-m hypothesis"
1269+
PYTEST_ARGS: "-m hypothesis -W error"
12691270
image: conda-python-pandas
12701271

12711272
test-conda-python-3.10-substrait:

docker-compose.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,7 @@ services:
835835
shm_size: *shm-size
836836
environment:
837837
<<: [*common, *ccache, *sccache]
838+
PYTEST_ARGS: # inherit
838839
volumes: *conda-volumes
839840
command: &python-conda-command
840841
["

python/pyarrow/tests/parquet/conftest.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,23 +29,26 @@ def datadir(base_datadir):
2929
def s3_bucket(s3_server):
3030
boto3 = pytest.importorskip('boto3')
3131
botocore = pytest.importorskip('botocore')
32+
s3_bucket_name = 'test-s3fs'
3233

3334
host, port, access_key, secret_key = s3_server['connection']
34-
s3 = boto3.resource(
35+
s3_client = boto3.client(
3536
's3',
3637
endpoint_url='http://{}:{}'.format(host, port),
3738
aws_access_key_id=access_key,
3839
aws_secret_access_key=secret_key,
3940
config=botocore.client.Config(signature_version='s3v4'),
4041
region_name='us-east-1'
4142
)
42-
bucket = s3.Bucket('test-s3fs')
43+
4344
try:
44-
bucket.create()
45+
s3_client.create_bucket(Bucket=s3_bucket_name)
4546
except Exception:
46-
# we get BucketAlreadyOwnedByYou error with fsspec handler
47-
pass
48-
return 'test-s3fs'
47+
pass # we get BucketAlreadyOwnedByYou error with fsspec handler
48+
finally:
49+
s3_client.close()
50+
51+
return s3_bucket_name
4952

5053

5154
@pytest.fixture

python/pyarrow/tests/parquet/test_dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1316,7 +1316,7 @@ def _test_write_to_dataset_with_partitions(base_path,
13161316
output_df[col] = output_df[col].astype('category')
13171317

13181318
if schema:
1319-
expected_date_type = schema.field_by_name('date').type.to_pandas_dtype()
1319+
expected_date_type = schema.field('date').type.to_pandas_dtype()
13201320
output_df["date"] = output_df["date"].astype(expected_date_type)
13211321

13221322
tm.assert_frame_equal(output_df, input_df)

python/pyarrow/tests/strategies.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -182,15 +182,17 @@ def struct_types(draw, item_strategy=primitive_types):
182182

183183

184184
def dictionary_types(key_strategy=None, value_strategy=None):
185-
key_strategy = key_strategy or signed_integer_types
186-
value_strategy = value_strategy or st.one_of(
187-
bool_type,
188-
integer_types,
189-
st.sampled_from([pa.float32(), pa.float64()]),
190-
binary_type,
191-
string_type,
192-
fixed_size_binary_type,
193-
)
185+
if key_strategy is None:
186+
key_strategy = signed_integer_types
187+
if value_strategy is None:
188+
value_strategy = st.one_of(
189+
bool_type,
190+
integer_types,
191+
st.sampled_from([pa.float32(), pa.float64()]),
192+
binary_type,
193+
string_type,
194+
fixed_size_binary_type,
195+
)
194196
return st.builds(pa.dictionary, key_strategy, value_strategy)
195197

196198

@@ -368,7 +370,7 @@ def record_batches(draw, type, rows=None, max_fields=None):
368370
children = [draw(arrays(field.type, size=rows)) for field in schema]
369371
# TODO(kszucs): the names and schema arguments are not consistent with
370372
# Table.from_array's arguments
371-
return pa.RecordBatch.from_arrays(children, names=schema)
373+
return pa.RecordBatch.from_arrays(children, schema=schema)
372374

373375

374376
@st.composite

python/pyarrow/tests/test_pandas.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2913,7 +2913,10 @@ def test_strided_data_import(self):
29132913
'f4', 'f8']
29142914

29152915
for type_name in numeric_dtypes:
2916-
cases.append(random_numbers.astype(type_name))
2916+
# Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning
2917+
with warnings.catch_warnings():
2918+
warnings.simplefilter("ignore")
2919+
cases.append(random_numbers.astype(type_name))
29172920

29182921
# strings
29192922
cases.append(np.array([random_ascii(10) for i in range(N * K)],

python/pyarrow/tests/test_tensor.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import os
1919
import sys
2020
import pytest
21+
import warnings
2122
import weakref
2223

2324
import numpy as np
@@ -82,8 +83,10 @@ def test_tensor_base_object():
8283
@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
8384
def test_tensor_numpy_roundtrip(dtype_str, arrow_type):
8485
dtype = np.dtype(dtype_str)
85-
data = (100 * np.random.randn(10, 4)).astype(dtype)
86-
86+
# Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning
87+
with warnings.catch_warnings():
88+
warnings.simplefilter("ignore")
89+
data = (100 * np.random.randn(10, 4)).astype(dtype)
8790
tensor = pa.Tensor.from_numpy(data)
8891
assert tensor.type == arrow_type
8992

0 commit comments

Comments
 (0)