From d9d855bef35d047123169a5f7a34052f21739765 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 3 Jun 2021 15:58:57 +0200 Subject: [PATCH 1/9] Use pytest --- python/tests/generic.py | 54 ++++-- python/tests/test_df.py | 136 +++++++------- python/tests/test_sql.py | 385 ++++++++++++++------------------------ python/tests/test_udaf.py | 85 +++++---- 4 files changed, 288 insertions(+), 372 deletions(-) diff --git a/python/tests/generic.py b/python/tests/generic.py index 267d6f656ce0..06c646585c64 100644 --- a/python/tests/generic.py +++ b/python/tests/generic.py @@ -16,24 +16,31 @@ # under the License. import datetime -import numpy -import pyarrow +import os.path +import shutil + +import numpy as np +import pyarrow as pa +import pyarrow.parquet as pq +import datafusion # used to write parquet files -import pyarrow.parquet def data(): - data = numpy.concatenate( - [numpy.random.normal(0, 0.01, size=50), numpy.random.normal(50, 0.01, size=50)] - ) - return pyarrow.array(data) + np.random.seed(1) + data = np.concatenate([ + np.random.normal(0, 0.01, size=50), + np.random.normal(50, 0.01, size=50) + ]) + return pa.array(data) def data_with_nans(): - data = numpy.random.normal(0, 0.01, size=50) - mask = numpy.random.randint(0, 2, size=50) - data[mask == 0] = numpy.NaN + np.random.seed(0) + data = np.random.normal(0, 0.01, size=50) + mask = np.random.randint(0, 2, size=50) + data[mask == 0] = np.NaN return data @@ -43,8 +50,19 @@ def data_datetime(f): datetime.datetime.now() - datetime.timedelta(days=1), datetime.datetime.now() + datetime.timedelta(days=1), ] - return pyarrow.array( - data, type=pyarrow.timestamp(f), mask=numpy.array([False, True, False]) + return pa.array( + data, type=pa.timestamp(f), mask=np.array([False, True, False]) + ) + + +def data_date32(): + data = [ + datetime.date(2000, 1, 1), + datetime.date(1980, 1, 1), + datetime.date(2030, 1, 1), + ] + return pa.array( + data, type=pa.date32(), mask=np.array([False, True, False]) ) @@ -54,16 +72,16 @@ def data_timedelta(f): datetime.timedelta(days=1), datetime.timedelta(seconds=1), ] - return pyarrow.array( - data, type=pyarrow.duration(f), mask=numpy.array([False, True, False]) + return pa.array( + data, type=pa.duration(f), mask=np.array([False, True, False]) ) def data_binary_other(): - return numpy.array([1, 0, 0], dtype="u4") + return np.array([1, 0, 0], dtype="u4") def write_parquet(path, data): - table = pyarrow.Table.from_arrays([data], names=["a"]) - pyarrow.parquet.write_table(table, path) - return path + table = pa.Table.from_arrays([data], names=["a"]) + pq.write_table(table, path) + return str(path) diff --git a/python/tests/test_df.py b/python/tests/test_df.py index fdafdfa7f509..e79b91ec503a 100644 --- a/python/tests/test_df.py +++ b/python/tests/test_df.py @@ -15,100 +15,104 @@ # specific language governing permissions and limitations # under the License. -import unittest +import pytest import pyarrow as pa -import datafusion +from datafusion import ExecutionContext, functions as f -f = datafusion.functions +@pytest.fixture +def df(): + ctx = ExecutionContext() -class TestCase(unittest.TestCase): - def _prepare(self): - ctx = datafusion.ExecutionContext() + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) - # create a RecordBatch and a new DataFrame from it - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - return ctx.create_dataframe([[batch]]) + return ctx.create_dataframe([[batch]]) - def test_select(self): - df = self._prepare() - df = df.select( - f.col("a") + f.col("b"), - f.col("a") - f.col("b"), - ) +def test_select(df): + df = df.select( + f.col("a") + f.col("b"), + f.col("a") - f.col("b"), + ) - # execute and collect the first (and only) batch - result = df.collect()[0] + # execute and collect the first (and only) batch + result = df.collect()[0] - self.assertEqual(result.column(0), pa.array([5, 7, 9])) - self.assertEqual(result.column(1), pa.array([-3, -3, -3])) + assert result.column(0) == pa.array([5, 7, 9]) + assert result.column(1) == pa.array([-3, -3, -3]) - def test_filter(self): - df = self._prepare() - df = df.select( +def test_filter(df): + df = df \ + .select( f.col("a") + f.col("b"), f.col("a") - f.col("b"), - ).filter(f.col("a") > f.lit(2)) + ) \ + .filter(f.col("a") > f.lit(2)) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([9]) + assert result.column(1) == pa.array([-3]) + - # execute and collect the first (and only) batch - result = df.collect()[0] +def test_sort(df): + df = df.sort([ + f.col("b").sort(ascending=False) + ]) - self.assertEqual(result.column(0), pa.array([9])) - self.assertEqual(result.column(1), pa.array([-3])) + table = pa.Table.from_batches(df.collect()) + expected = {'a': [3, 2, 1], 'b': [6, 5, 4]} - def test_sort(self): - df = self._prepare() - df = df.sort([f.col("b").sort(ascending=False)]) + assert table.to_pydict() == expected - table = pa.Table.from_batches(df.collect()) - expected = {"a": [3, 2, 1], "b": [6, 5, 4]} - self.assertEqual(table.to_pydict(), expected) - def test_limit(self): - df = self._prepare() +def test_limit(df): + df = df.limit(1) - df = df.limit(1) + # execute and collect the first (and only) batch + result = df.collect()[0] - # execute and collect the first (and only) batch - result = df.collect()[0] + assert len(result.column(0)) == 1 + assert len(result.column(1)) == 1 - self.assertEqual(len(result.column(0)), 1) - self.assertEqual(len(result.column(1)), 1) - def test_udf(self): - df = self._prepare() +def test_udf(df): + # is_null is a pa function over arrays + udf = f.udf(lambda x: x.is_null(), [pa.int64()], pa.bool_()) - # is_null is a pa function over arrays - udf = f.udf(lambda x: x.is_null(), [pa.int64()], pa.bool_()) + df = df.select(udf(f.col("a"))) + result = df.collect()[0].column(0) - df = df.select(udf(f.col("a"))) + assert result == pa.array([False, False, False]) - self.assertEqual(df.collect()[0].column(0), pa.array([False, False, False])) - def test_join(self): - ctx = datafusion.ExecutionContext() +def test_join(): + ctx = ExecutionContext() - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - df = ctx.create_dataframe([[batch]]) + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + df = ctx.create_dataframe([[batch]]) - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2]), pa.array([8, 10])], - names=["a", "c"], - ) - df1 = ctx.create_dataframe([[batch]]) + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2]), pa.array([8, 10])], + names=["a", "c"], + ) + df1 = ctx.create_dataframe([[batch]]) - df = df.join(df1, on="a", how="inner") - df = df.sort([f.col("a").sort(ascending=True)]) - table = pa.Table.from_batches(df.collect()) + df = df.join(df1, on="a", how="inner") + df = df.sort([ + f.col("a").sort(ascending=True) + ]) + table = pa.Table.from_batches(df.collect()) - expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]} - self.assertEqual(table.to_pydict(), expected) + expected = {'a': [1, 2], 'c': [8, 10], 'b': [4, 5]} + assert table.to_pydict() == expected diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py index 117284973fb7..15c8e43c4bd3 100644 --- a/python/tests/test_sql.py +++ b/python/tests/test_sql.py @@ -15,286 +15,181 @@ # specific language governing permissions and limitations # under the License. -import unittest import tempfile import datetime import os.path import shutil -import numpy -import pyarrow -import datafusion +import pytest +import numpy as np +import pyarrow as pa +import pyarrow.parquet as pq -# used to write parquet files -import pyarrow.parquet +from datafusion import ExecutionContext +import datafusion from tests.generic import * -class TestCase(unittest.TestCase): - def setUp(self): - # Create a temporary directory - self.test_dir = tempfile.mkdtemp() - numpy.random.seed(1) - - def tearDown(self): - # Remove the directory after the test - shutil.rmtree(self.test_dir) - - def test_no_table(self): - with self.assertRaises(Exception): - datafusion.Context().sql("SELECT a FROM b").collect() - - def test_register(self): - ctx = datafusion.ExecutionContext() - - path = write_parquet(os.path.join(self.test_dir, "a.parquet"), data()) +@pytest.fixture +def ctx(): + return ExecutionContext() - ctx.register_parquet("t", path) - self.assertEqual(ctx.tables(), {"t"}) +def test_no_table(ctx): + #TODO(kszucs): should raise a DataFusionError instead of plain Exeption + with pytest.raises(Exception, match="DataFusion error"): + ctx.sql("SELECT a FROM b").collect() - def test_execute(self): - data = [1, 1, 2, 2, 3, 11, 12] - ctx = datafusion.ExecutionContext() +def test_register(ctx, tmp_path): + path = write_parquet(tmp_path / "a.parquet", data()) + ctx.register_parquet("t", path) - # single column, "a" - path = write_parquet( - os.path.join(self.test_dir, "a.parquet"), pyarrow.array(data) - ) - ctx.register_parquet("t", path) - - self.assertEqual(ctx.tables(), {"t"}) - - # count - result = ctx.sql("SELECT COUNT(a) FROM t").collect() + assert ctx.tables() == {"t"} - expected = pyarrow.array([7], pyarrow.uint64()) - expected = [pyarrow.RecordBatch.from_arrays([expected], ["COUNT(a)"])] - self.assertEqual(expected, result) - # where - expected = pyarrow.array([2], pyarrow.uint64()) - expected = [pyarrow.RecordBatch.from_arrays([expected], ["COUNT(a)"])] - self.assertEqual( - expected, ctx.sql("SELECT COUNT(a) FROM t WHERE a > 10").collect() - ) +def test_execute(ctx, tmp_path): + data = [1, 1, 2, 2, 3, 11, 12] - # group by - results = ctx.sql( - "SELECT CAST(a as int), COUNT(a) FROM t GROUP BY CAST(a as int)" - ).collect() - - # group by returns batches - result_keys = [] - result_values = [] - for result in results: - pydict = result.to_pydict() - result_keys.extend(pydict["CAST(a AS Int32)"]) - result_values.extend(pydict["COUNT(a)"]) - - result_keys, result_values = ( - list(t) for t in zip(*sorted(zip(result_keys, result_values))) - ) + # single column, "a" + path = write_parquet(tmp_path / "a.parquet", pa.array(data)) + ctx.register_parquet("t", path) - self.assertEqual(result_keys, [1, 2, 3, 11, 12]) - self.assertEqual(result_values, [2, 2, 1, 1, 1]) - - # order by - result = ctx.sql( - "SELECT a, CAST(a AS int) FROM t ORDER BY a DESC LIMIT 2" - ).collect() - expected_a = pyarrow.array([50.0219, 50.0152], pyarrow.float64()) - expected_cast = pyarrow.array([50, 50], pyarrow.int32()) - expected = [ - pyarrow.RecordBatch.from_arrays( - [expected_a, expected_cast], ["a", "CAST(a AS Int32)"] - ) - ] - numpy.testing.assert_equal(expected[0].column(1), expected[0].column(1)) - - def test_cast(self): - """ - Verify that we can cast - """ - ctx = datafusion.ExecutionContext() - - path = write_parquet(os.path.join(self.test_dir, "a.parquet"), data()) - ctx.register_parquet("t", path) - - valid_types = [ - "smallint", - "int", - "bigint", - "float(32)", - "float(64)", - "float", - ] - - select = ", ".join( - [f"CAST(9 AS {t}) AS A{i}" for i, t in enumerate(valid_types)] - ) + assert ctx.tables() == {"t"} - # can execute, which implies that we can cast - ctx.sql(f"SELECT {select} FROM t").collect() + # count + result = ctx.sql("SELECT COUNT(a) FROM t").collect() - def _test_udf(self, udf, args, return_type, array, expected): - ctx = datafusion.ExecutionContext() + expected = pa.array([7], pa.uint64()) + expected = [pa.RecordBatch.from_arrays([expected], ["COUNT(a)"])] + assert result == expected - # write to disk - path = write_parquet(os.path.join(self.test_dir, "a.parquet"), array) - ctx.register_parquet("t", path) + # where + expected = pa.array([2], pa.uint64()) + expected = [pa.RecordBatch.from_arrays([expected], ["COUNT(a)"])] + result = ctx.sql("SELECT COUNT(a) FROM t WHERE a > 10").collect() + assert result == expected - ctx.register_udf("udf", udf, args, return_type) + # group by + results = ctx.sql( + "SELECT CAST(a as int), COUNT(a) FROM t GROUP BY CAST(a as int)" + ).collect() - batches = ctx.sql("SELECT udf(a) AS tt FROM t").collect() + # group by returns batches + result_keys = [] + result_values = [] + for result in results: + pydict = result.to_pydict() + result_keys.extend(pydict["CAST(a AS Int32)"]) + result_values.extend(pydict["COUNT(a)"]) - result = batches[0].column(0) + result_keys, result_values = ( + list(t) for t in zip(*sorted(zip(result_keys, result_values))) + ) - self.assertEqual(expected, result) + assert result_keys == [1, 2, 3, 11, 12] + assert result_values == [2, 2, 1, 1, 1] - def test_udf_identity(self): - self._test_udf( - lambda x: x, - [pyarrow.float64()], - pyarrow.float64(), - pyarrow.array([-1.2, None, 1.2]), - pyarrow.array([-1.2, None, 1.2]), + # order by + result = ctx.sql( + "SELECT a, CAST(a AS int) FROM t ORDER BY a DESC LIMIT 2" + ).collect() + expected_a = pa.array([50.0219, 50.0152], pa.float64()) + expected_cast = pa.array([50, 50], pa.int32()) + expected = [ + pa.RecordBatch.from_arrays( + [expected_a, expected_cast], ["a", "CAST(a AS Int32)"] ) - - def test_udf(self): - self._test_udf( + ] + np.testing.assert_equal(expected[0].column(1), expected[0].column(1)) + + +def test_cast(ctx, tmp_path): + """ + Verify that we can cast + """ + path = write_parquet(tmp_path / "a.parquet", data()) + ctx.register_parquet("t", path) + + valid_types = [ + "smallint", + "int", + "bigint", + "float(32)", + "float(64)", + "float", + ] + + select = ", ".join( + [f"CAST(9 AS {t}) AS A{i}" for i, t in enumerate(valid_types)] + ) + + # can execute, which implies that we can cast + ctx.sql(f"SELECT {select} FROM t").collect() + + +@pytest.mark.parametrize( + ("fn", "input_types", "output_type", "input_values", "expected_values"), + [ + ( + lambda x: x, + [pa.float64()], + pa.float64(), + [-1.2, None, 1.2], + [-1.2, None, 1.2] + ), + ( lambda x: x.is_null(), - [pyarrow.float64()], - pyarrow.bool_(), - pyarrow.array([-1.2, None, 1.2]), - pyarrow.array([False, True, False]), + [pa.float64()], + pa.bool_(), + [-1.2, None, 1.2], + [False, True, False] ) + ] +) +def test_udf(ctx, tmp_path, fn, input_types, output_type, input_values, expected_values): + # write to disk + path = write_parquet(tmp_path / "a.parquet", pa.array(input_values)) + ctx.register_parquet("t", path) + ctx.register_udf("udf", fn, input_types, output_type) + batches = ctx.sql("SELECT udf(a) AS tt FROM t").collect() + result = batches[0].column(0) -class TestIO(unittest.TestCase): - def setUp(self): - # Create a temporary directory - self.test_dir = tempfile.mkdtemp() - - def tearDown(self): - # Remove the directory after the test - shutil.rmtree(self.test_dir) - - def _test_data(self, data): - ctx = datafusion.ExecutionContext() - - # write to disk - path = write_parquet(os.path.join(self.test_dir, "a.parquet"), data) - ctx.register_parquet("t", path) + assert result == pa.array(expected_values) - batches = ctx.sql("SELECT a AS tt FROM t").collect() - result = batches[0].column(0) - - numpy.testing.assert_equal(data, result) - - def test_nans(self): - self._test_data(data_with_nans()) - - def test_utf8(self): - array = pyarrow.array( - ["a", "b", "c"], pyarrow.utf8(), numpy.array([False, True, False]) - ) - self._test_data(array) - - def test_large_utf8(self): - array = pyarrow.array( - ["a", "b", "c"], pyarrow.large_utf8(), numpy.array([False, True, False]) - ) - self._test_data(array) +_null_mask = np.array([False, True, False]) - # Error from Arrow - @unittest.expectedFailure - def test_datetime_s(self): - self._test_data(data_datetime("s")) +@pytest.mark.parametrize('arr', [ + pa.array(["a", "b", "c"], pa.utf8(), _null_mask), + pa.array(["a", "b", "c"], pa.large_utf8(), _null_mask), + pa.array([b"1", b"2", b"3"], pa.binary(), _null_mask), + pa.array([b"1111", b"2222", b"3333"], pa.large_binary(), _null_mask), + pa.array([False, True, True], None, _null_mask), + pa.array([0, 1, 2], None), + data_binary_other(), + data_date32(), + data_with_nans(), # C data interface missing - @unittest.expectedFailure - def test_datetime_ms(self): - self._test_data(data_datetime("ms")) - - # C data interface missing - @unittest.expectedFailure - def test_datetime_us(self): - self._test_data(data_datetime("us")) - - # Not writtable to parquet - @unittest.expectedFailure - def test_datetime_ns(self): - self._test_data(data_datetime("ns")) - - # Not writtable to parquet - @unittest.expectedFailure - def test_timedelta_s(self): - self._test_data(data_timedelta("s")) - - # Not writtable to parquet - @unittest.expectedFailure - def test_timedelta_ms(self): - self._test_data(data_timedelta("ms")) - + pytest.param(pa.array([b"1111", b"2222", b"3333"], pa.binary(4), _null_mask), marks=pytest.mark.xfail), + pytest.param(data_datetime("s"), marks=pytest.mark.xfail), + pytest.param(data_datetime("ms"), marks=pytest.mark.xfail), + pytest.param(data_datetime("us"), marks=pytest.mark.xfail), + pytest.param(data_datetime("ns"), marks=pytest.mark.xfail), # Not writtable to parquet - @unittest.expectedFailure - def test_timedelta_us(self): - self._test_data(data_timedelta("us")) - - # Not writtable to parquet - @unittest.expectedFailure - def test_timedelta_ns(self): - self._test_data(data_timedelta("ns")) - - def test_date32(self): - array = pyarrow.array( - [ - datetime.date(2000, 1, 1), - datetime.date(1980, 1, 1), - datetime.date(2030, 1, 1), - ], - pyarrow.date32(), - numpy.array([False, True, False]), - ) - self._test_data(array) - - def test_binary_variable(self): - array = pyarrow.array( - [b"1", b"2", b"3"], pyarrow.binary(), numpy.array([False, True, False]) - ) - self._test_data(array) - - # C data interface missing - @unittest.expectedFailure - def test_binary_fixed(self): - array = pyarrow.array( - [b"1111", b"2222", b"3333"], - pyarrow.binary(4), - numpy.array([False, True, False]), - ) - self._test_data(array) - - def test_large_binary(self): - array = pyarrow.array( - [b"1111", b"2222", b"3333"], - pyarrow.large_binary(), - numpy.array([False, True, False]), - ) - self._test_data(array) - - def test_binary_other(self): - self._test_data(data_binary_other()) - - def test_bool(self): - array = pyarrow.array( - [False, True, True], None, numpy.array([False, True, False]) - ) - self._test_data(array) - - def test_u32(self): - array = pyarrow.array([0, 1, 2], None, numpy.array([False, True, False])) - self._test_data(array) + pytest.param(data_timedelta("s"), marks=pytest.mark.xfail), + pytest.param(data_timedelta("ms"), marks=pytest.mark.xfail), + pytest.param(data_timedelta("us"), marks=pytest.mark.xfail), + pytest.param(data_timedelta("ns"), marks=pytest.mark.xfail), +]) +def test_simple_select(ctx, tmp_path, arr): + path = write_parquet(tmp_path / "a.parquet", arr) + ctx.register_parquet("t", path) + + batches = ctx.sql("SELECT a AS tt FROM t").collect() + result = batches[0].column(0) + + np.testing.assert_equal(result, arr) diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py index e1e4f933a9b4..2a69bcee4b87 100644 --- a/python/tests/test_udaf.py +++ b/python/tests/test_udaf.py @@ -15,12 +15,12 @@ # specific language governing permissions and limitations # under the License. -import unittest -import pyarrow -import pyarrow.compute -import datafusion +import pytest -f = datafusion.functions +import pyarrow as pa +import pyarrow.compute as pc + +from datafusion import ExecutionContext, functions as f class Accumulator: @@ -29,63 +29,62 @@ class Accumulator: """ def __init__(self): - self._sum = pyarrow.scalar(0.0) + self._sum = pa.scalar(0.0) - def to_scalars(self) -> [pyarrow.Scalar]: + def to_scalars(self) -> [pa.Scalar]: return [self._sum] - def update(self, values: pyarrow.Array) -> None: + def update(self, values: pa.Array) -> None: # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pyarrow.scalar( - self._sum.as_py() + pyarrow.compute.sum(values).as_py() + self._sum = pa.scalar( + self._sum.as_py() + pc.sum(values).as_py() ) - def merge(self, states: pyarrow.Array) -> None: + def merge(self, states: pa.Array) -> None: # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pyarrow.scalar( - self._sum.as_py() + pyarrow.compute.sum(states).as_py() + self._sum = pa.scalar( + self._sum.as_py() + pc.sum(states).as_py() ) - def evaluate(self) -> pyarrow.Scalar: + def evaluate(self) -> pa.Scalar: return self._sum -class TestCase(unittest.TestCase): - def _prepare(self): - ctx = datafusion.ExecutionContext() - # create a RecordBatch and a new DataFrame from it - batch = pyarrow.RecordBatch.from_arrays( - [pyarrow.array([1, 2, 3]), pyarrow.array([4, 4, 6])], - names=["a", "b"], - ) - return ctx.create_dataframe([[batch]]) +@pytest.fixture +def df(): + ctx = ExecutionContext() - def test_aggregate(self): - df = self._prepare() + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 4, 6])], + names=["a", "b"], + ) + return ctx.create_dataframe([[batch]]) - udaf = f.udaf( - Accumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()] - ) - df = df.aggregate([], [udaf(f.col("a"))]) +def test_aggregate(df): + udaf = f.udaf( + Accumulator, pa.float64(), pa.float64(), [pa.float64()] + ) - # execute and collect the first (and only) batch - result = df.collect()[0] + df = df.aggregate([], [udaf(f.col("a"))]) - self.assertEqual(result.column(0), pyarrow.array([1.0 + 2.0 + 3.0])) + # execute and collect the first (and only) batch + result = df.collect()[0] - def test_group_by(self): - df = self._prepare() + assert result.column(0) == pa.array([1.0 + 2.0 + 3.0]) - udaf = f.udaf( - Accumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()] - ) - df = df.aggregate([f.col("b")], [udaf(f.col("a"))]) +def test_group_by(df): + udaf = f.udaf( + Accumulator, pa.float64(), pa.float64(), [pa.float64()] + ) + + df = df.aggregate([f.col("b")], [udaf(f.col("a"))]) + + batches = df.collect() + arrays = [batch.column(1) for batch in batches] + joined = pa.concat_arrays(arrays) + assert joined == pa.array([1.0 + 2.0, 3.0])) - # execute and collect the first (and only) batch - batches = df.collect() - arrays = [batch.column(1) for batch in batches] - joined = pyarrow.concat_arrays(arrays) - self.assertEqual(joined, pyarrow.array([1.0 + 2.0, 3.0])) From 68c456da4585b8a5b3f50698db8492f94c371078 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 3 Jun 2021 16:03:58 +0200 Subject: [PATCH 2/9] Formatting --- python/tests/generic.py | 13 +++--- python/tests/test_df.py | 28 +++++-------- python/tests/test_sql.py | 88 ++++++++++++++++++++------------------- python/tests/test_udaf.py | 30 +++++-------- 4 files changed, 73 insertions(+), 86 deletions(-) diff --git a/python/tests/generic.py b/python/tests/generic.py index 06c646585c64..e61542e6ab37 100644 --- a/python/tests/generic.py +++ b/python/tests/generic.py @@ -16,23 +16,22 @@ # under the License. import datetime -import os.path -import shutil import numpy as np import pyarrow as pa import pyarrow.parquet as pq -import datafusion # used to write parquet files def data(): np.random.seed(1) - data = np.concatenate([ - np.random.normal(0, 0.01, size=50), - np.random.normal(50, 0.01, size=50) - ]) + data = np.concatenate( + [ + np.random.normal(0, 0.01, size=50), + np.random.normal(50, 0.01, size=50), + ] + ) return pa.array(data) diff --git a/python/tests/test_df.py b/python/tests/test_df.py index e79b91ec503a..5b6cbddbd74b 100644 --- a/python/tests/test_df.py +++ b/python/tests/test_df.py @@ -15,10 +15,10 @@ # specific language governing permissions and limitations # under the License. -import pytest - import pyarrow as pa -from datafusion import ExecutionContext, functions as f +import pytest +from datafusion import ExecutionContext +from datafusion import functions as f @pytest.fixture @@ -48,12 +48,10 @@ def test_select(df): def test_filter(df): - df = df \ - .select( - f.col("a") + f.col("b"), - f.col("a") - f.col("b"), - ) \ - .filter(f.col("a") > f.lit(2)) + df = df.select( + f.col("a") + f.col("b"), + f.col("a") - f.col("b"), + ).filter(f.col("a") > f.lit(2)) # execute and collect the first (and only) batch result = df.collect()[0] @@ -63,12 +61,10 @@ def test_filter(df): def test_sort(df): - df = df.sort([ - f.col("b").sort(ascending=False) - ]) + df = df.sort([f.col("b").sort(ascending=False)]) table = pa.Table.from_batches(df.collect()) - expected = {'a': [3, 2, 1], 'b': [6, 5, 4]} + expected = {"a": [3, 2, 1], "b": [6, 5, 4]} assert table.to_pydict() == expected @@ -109,10 +105,8 @@ def test_join(): df1 = ctx.create_dataframe([[batch]]) df = df.join(df1, on="a", how="inner") - df = df.sort([ - f.col("a").sort(ascending=True) - ]) + df = df.sort([f.col("a").sort(ascending=True)]) table = pa.Table.from_batches(df.collect()) - expected = {'a': [1, 2], 'c': [8, 10], 'b': [4, 5]} + expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]} assert table.to_pydict() == expected diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py index 15c8e43c4bd3..4eaf2b259d7c 100644 --- a/python/tests/test_sql.py +++ b/python/tests/test_sql.py @@ -15,20 +15,12 @@ # specific language governing permissions and limitations # under the License. -import tempfile -import datetime -import os.path -import shutil - -import pytest import numpy as np import pyarrow as pa -import pyarrow.parquet as pq - +import pytest from datafusion import ExecutionContext -import datafusion -from tests.generic import * +from . import generic as helpers @pytest.fixture @@ -37,13 +29,13 @@ def ctx(): def test_no_table(ctx): - #TODO(kszucs): should raise a DataFusionError instead of plain Exeption + # TODO(kszucs): should raise a DataFusionError instead of plain Exeption with pytest.raises(Exception, match="DataFusion error"): ctx.sql("SELECT a FROM b").collect() def test_register(ctx, tmp_path): - path = write_parquet(tmp_path / "a.parquet", data()) + path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data()) ctx.register_parquet("t", path) assert ctx.tables() == {"t"} @@ -53,7 +45,7 @@ def test_execute(ctx, tmp_path): data = [1, 1, 2, 2, 3, 11, 12] # single column, "a" - path = write_parquet(tmp_path / "a.parquet", pa.array(data)) + path = helpers.write_parquet(tmp_path / "a.parquet", pa.array(data)) ctx.register_parquet("t", path) assert ctx.tables() == {"t"} @@ -109,7 +101,7 @@ def test_cast(ctx, tmp_path): """ Verify that we can cast """ - path = write_parquet(tmp_path / "a.parquet", data()) + path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data()) ctx.register_parquet("t", path) valid_types = [ @@ -137,20 +129,24 @@ def test_cast(ctx, tmp_path): [pa.float64()], pa.float64(), [-1.2, None, 1.2], - [-1.2, None, 1.2] + [-1.2, None, 1.2], ), ( lambda x: x.is_null(), [pa.float64()], pa.bool_(), [-1.2, None, 1.2], - [False, True, False] - ) - ] + [False, True, False], + ), + ], ) -def test_udf(ctx, tmp_path, fn, input_types, output_type, input_values, expected_values): +def test_udf( + ctx, tmp_path, fn, input_types, output_type, input_values, expected_values +): # write to disk - path = write_parquet(tmp_path / "a.parquet", pa.array(input_values)) + path = helpers.write_parquet( + tmp_path / "a.parquet", pa.array(input_values) + ) ctx.register_parquet("t", path) ctx.register_udf("udf", fn, input_types, output_type) @@ -163,30 +159,36 @@ def test_udf(ctx, tmp_path, fn, input_types, output_type, input_values, expected _null_mask = np.array([False, True, False]) -@pytest.mark.parametrize('arr', [ - pa.array(["a", "b", "c"], pa.utf8(), _null_mask), - pa.array(["a", "b", "c"], pa.large_utf8(), _null_mask), - pa.array([b"1", b"2", b"3"], pa.binary(), _null_mask), - pa.array([b"1111", b"2222", b"3333"], pa.large_binary(), _null_mask), - pa.array([False, True, True], None, _null_mask), - pa.array([0, 1, 2], None), - data_binary_other(), - data_date32(), - data_with_nans(), - # C data interface missing - pytest.param(pa.array([b"1111", b"2222", b"3333"], pa.binary(4), _null_mask), marks=pytest.mark.xfail), - pytest.param(data_datetime("s"), marks=pytest.mark.xfail), - pytest.param(data_datetime("ms"), marks=pytest.mark.xfail), - pytest.param(data_datetime("us"), marks=pytest.mark.xfail), - pytest.param(data_datetime("ns"), marks=pytest.mark.xfail), - # Not writtable to parquet - pytest.param(data_timedelta("s"), marks=pytest.mark.xfail), - pytest.param(data_timedelta("ms"), marks=pytest.mark.xfail), - pytest.param(data_timedelta("us"), marks=pytest.mark.xfail), - pytest.param(data_timedelta("ns"), marks=pytest.mark.xfail), -]) +@pytest.mark.parametrize( + "arr", + [ + pa.array(["a", "b", "c"], pa.utf8(), _null_mask), + pa.array(["a", "b", "c"], pa.large_utf8(), _null_mask), + pa.array([b"1", b"2", b"3"], pa.binary(), _null_mask), + pa.array([b"1111", b"2222", b"3333"], pa.large_binary(), _null_mask), + pa.array([False, True, True], None, _null_mask), + pa.array([0, 1, 2], None), + helpers.data_binary_other(), + helpers.data_date32(), + helpers.data_with_nans(), + # C data interface missing + pytest.param( + pa.array([b"1111", b"2222", b"3333"], pa.binary(4), _null_mask), + marks=pytest.mark.xfail, + ), + pytest.param(helpers.data_datetime("s"), marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("ms"), marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("us"), marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("ns"), marks=pytest.mark.xfail), + # Not writtable to parquet + pytest.param(helpers.data_timedelta("s"), marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("ms"), marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("us"), marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("ns"), marks=pytest.mark.xfail), + ], +) def test_simple_select(ctx, tmp_path, arr): - path = write_parquet(tmp_path / "a.parquet", arr) + path = helpers.write_parquet(tmp_path / "a.parquet", arr) ctx.register_parquet("t", path) batches = ctx.sql("SELECT a AS tt FROM t").collect() diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py index 2a69bcee4b87..98ef95e12ace 100644 --- a/python/tests/test_udaf.py +++ b/python/tests/test_udaf.py @@ -15,12 +15,11 @@ # specific language governing permissions and limitations # under the License. -import pytest - import pyarrow as pa import pyarrow.compute as pc - -from datafusion import ExecutionContext, functions as f +import pytest +from datafusion import ExecutionContext +from datafusion import functions as f class Accumulator: @@ -35,22 +34,19 @@ def to_scalars(self) -> [pa.Scalar]: return [self._sum] def update(self, values: pa.Array) -> None: - # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pa.scalar( - self._sum.as_py() + pc.sum(values).as_py() - ) + # Not nice since pyarrow scalars can't be summed yet. + # This breaks on `None` + self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py()) def merge(self, states: pa.Array) -> None: - # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pa.scalar( - self._sum.as_py() + pc.sum(states).as_py() - ) + # Not nice since pyarrow scalars can't be summed yet. + # This breaks on `None` + self._sum = pa.scalar(self._sum.as_py() + pc.sum(states).as_py()) def evaluate(self) -> pa.Scalar: return self._sum - @pytest.fixture def df(): ctx = ExecutionContext() @@ -64,9 +60,7 @@ def df(): def test_aggregate(df): - udaf = f.udaf( - Accumulator, pa.float64(), pa.float64(), [pa.float64()] - ) + udaf = f.udaf(Accumulator, pa.float64(), pa.float64(), [pa.float64()]) df = df.aggregate([], [udaf(f.col("a"))]) @@ -77,9 +71,7 @@ def test_aggregate(df): def test_group_by(df): - udaf = f.udaf( - Accumulator, pa.float64(), pa.float64(), [pa.float64()] - ) + udaf = f.udaf(Accumulator, pa.float64(), pa.float64(), [pa.float64()]) df = df.aggregate([f.col("b")], [udaf(f.col("a"))]) From 4a4dd9edc8bf63bb0824063d47d04dfc4ebbf001 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 3 Jun 2021 16:09:04 +0200 Subject: [PATCH 3/9] Update GHA conf --- .github/workflows/python_test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python_test.yaml b/.github/workflows/python_test.yaml index 13516ff699da..f072363d0a8f 100644 --- a/.github/workflows/python_test.yaml +++ b/.github/workflows/python_test.yaml @@ -50,10 +50,10 @@ jobs: python -m venv venv source venv/bin/activate - pip install -r requirements.txt + pip install -r requirements.txt pytest maturin develop - python -m unittest discover tests + pytest -v . env: CARGO_HOME: "/home/runner/.cargo" CARGO_TARGET_DIR: "/home/runner/target" From 2f9df4e9d6a7c82161fe8ba7a53044a33eee9a75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 3 Jun 2021 17:07:52 +0200 Subject: [PATCH 4/9] Remove TODO note --- python/tests/test_sql.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py index 4eaf2b259d7c..361526d06970 100644 --- a/python/tests/test_sql.py +++ b/python/tests/test_sql.py @@ -29,7 +29,6 @@ def ctx(): def test_no_table(ctx): - # TODO(kszucs): should raise a DataFusionError instead of plain Exeption with pytest.raises(Exception, match="DataFusion error"): ctx.sql("SELECT a FROM b").collect() From f5b44cab0606b11747e3a975a33ea94dbdcd3c0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 7 Jun 2021 10:34:02 +0200 Subject: [PATCH 5/9] Format --- python/tests/test_udaf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py index 98ef95e12ace..b24c08dbc867 100644 --- a/python/tests/test_udaf.py +++ b/python/tests/test_udaf.py @@ -78,5 +78,4 @@ def test_group_by(df): batches = df.collect() arrays = [batch.column(1) for batch in batches] joined = pa.concat_arrays(arrays) - assert joined == pa.array([1.0 + 2.0, 3.0])) - + assert joined == pa.array([1.0 + 2.0, 3.0]) From e33212fd276344f66beb6ff2012d8564675eb330 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 7 Jun 2021 10:55:44 +0200 Subject: [PATCH 6/9] Test requirements file --- dev/release/rat_exclude_files.txt | 1 + python/requirements-test.in | 18 +++++ python/requirements-test.txt | 107 ++++++++++++++++++++++++++++++ python/requirements.txt | 16 ----- 4 files changed, 126 insertions(+), 16 deletions(-) create mode 100644 python/requirements-test.in create mode 100644 python/requirements-test.txt diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 6126699bbc1f..96beccd0af81 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -105,3 +105,4 @@ benchmarks/queries/q*.sql ballista/rust/scheduler/testdata/* ballista/ui/scheduler/yarn.lock python/rust-toolchain +python/requirements*.txt diff --git a/python/requirements-test.in b/python/requirements-test.in new file mode 100644 index 000000000000..95b4932fe20b --- /dev/null +++ b/python/requirements-test.in @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +-r requirements.in +pytest diff --git a/python/requirements-test.txt b/python/requirements-test.txt new file mode 100644 index 000000000000..b5dfb8a8c3c3 --- /dev/null +++ b/python/requirements-test.txt @@ -0,0 +1,107 @@ +# +# This file is autogenerated by pip-compile +# To update, run: +# +# pip-compile --generate-hashes requirements-test.in +# +attrs==21.2.0 \ + --hash=sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1 \ + --hash=sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb + # via pytest +iniconfig==1.1.1 \ + --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ + --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 + # via pytest +maturin==0.10.6 \ + --hash=sha256:0e81496f70a4805e6ea7dda7b0425246c111ccb119a2e22c64abeff131f4dd21 \ + --hash=sha256:3b5d5429bc05a816824420d99973f0cab39d8e274f6c3647bfd9afd95a030304 \ + --hash=sha256:4177a223727a0ad57bc3f69ca4c3bc04bb3cc4da787cc59a8e25808c85685c67 \ + --hash=sha256:4eb4481b6c7d6cac043b969d2eb993c982523e91bb2709f0b09e231cf4846731 \ + --hash=sha256:532625f312185b06ec196fdb0fc79efafc0e98768153d226fb9417c0ca85e410 \ + --hash=sha256:53ef64a147f8a5241a3e932f2db22b5ae7dc5892dae994da319446c5db89dc94 \ + --hash=sha256:a04589da42f62b1d515f35c81274a56fe0d29216894525e8a37fd1e3c69d87b1 \ + --hash=sha256:b58e9e2ba5a3f651d8885c41370a00bb1d3e4d7313cbb63354077153be7650f4 \ + --hash=sha256:bd39f7e08eb9908d4fe1cd9b3c953fad5b1fb4fec9c82d14c2973a65751e1899 \ + --hash=sha256:d63f2a15f0b8db4e70d9a59766ca240b2c2ee2146ed5e4385a6118d941d68b25 \ + --hash=sha256:fa7e1cea2a768257a33aeb556fdec5fc36011bfe82d96730117433c635629dd8 + # via -r requirements.in +numpy==1.20.3 \ + --hash=sha256:1676b0a292dd3c99e49305a16d7a9f42a4ab60ec522eac0d3dd20cdf362ac010 \ + --hash=sha256:16f221035e8bd19b9dc9a57159e38d2dd060b48e93e1d843c49cb370b0f415fd \ + --hash=sha256:43909c8bb289c382170e0282158a38cf306a8ad2ff6dfadc447e90f9961bef43 \ + --hash=sha256:4e465afc3b96dbc80cf4a5273e5e2b1e3451286361b4af70ce1adb2984d392f9 \ + --hash=sha256:55b745fca0a5ab738647d0e4db099bd0a23279c32b31a783ad2ccea729e632df \ + --hash=sha256:5d050e1e4bc9ddb8656d7b4f414557720ddcca23a5b88dd7cff65e847864c400 \ + --hash=sha256:637d827248f447e63585ca3f4a7d2dfaa882e094df6cfa177cc9cf9cd6cdf6d2 \ + --hash=sha256:6690080810f77485667bfbff4f69d717c3be25e5b11bb2073e76bb3f578d99b4 \ + --hash=sha256:66fbc6fed94a13b9801fb70b96ff30605ab0a123e775a5e7a26938b717c5d71a \ + --hash=sha256:67d44acb72c31a97a3d5d33d103ab06d8ac20770e1c5ad81bdb3f0c086a56cf6 \ + --hash=sha256:6ca2b85a5997dabc38301a22ee43c82adcb53ff660b89ee88dded6b33687e1d8 \ + --hash=sha256:6e51534e78d14b4a009a062641f465cfaba4fdcb046c3ac0b1f61dd97c861b1b \ + --hash=sha256:70eb5808127284c4e5c9e836208e09d685a7978b6a216db85960b1a112eeace8 \ + --hash=sha256:830b044f4e64a76ba71448fce6e604c0fc47a0e54d8f6467be23749ac2cbd2fb \ + --hash=sha256:8b7bb4b9280da3b2856cb1fc425932f46fba609819ee1c62256f61799e6a51d2 \ + --hash=sha256:a9c65473ebc342715cb2d7926ff1e202c26376c0dcaaee85a1fd4b8d8c1d3b2f \ + --hash=sha256:c1c09247ccea742525bdb5f4b5ceeacb34f95731647fe55774aa36557dbb5fa4 \ + --hash=sha256:c5bf0e132acf7557fc9bb8ded8b53bbbbea8892f3c9a1738205878ca9434206a \ + --hash=sha256:db250fd3e90117e0312b611574cd1b3f78bec046783195075cbd7ba9c3d73f16 \ + --hash=sha256:e515c9a93aebe27166ec9593411c58494fa98e5fcc219e47260d9ab8a1cc7f9f \ + --hash=sha256:e55185e51b18d788e49fe8305fd73ef4470596b33fc2c1ceb304566b99c71a69 \ + --hash=sha256:ea9cff01e75a956dbee133fa8e5b68f2f92175233de2f88de3a682dd94deda65 \ + --hash=sha256:f1452578d0516283c87608a5a5548b0cdde15b99650efdfd85182102ef7a7c17 \ + --hash=sha256:f39a995e47cb8649673cfa0579fbdd1cdd33ea497d1728a6cb194d6252268e48 + # via pyarrow +packaging==20.9 \ + --hash=sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5 \ + --hash=sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a + # via pytest +pluggy==0.13.1 \ + --hash=sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0 \ + --hash=sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d + # via pytest +py==1.10.0 \ + --hash=sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3 \ + --hash=sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a + # via pytest +pyarrow==4.0.1 \ + --hash=sha256:04be0f7cb9090bd029b5b53bed628548fef569e5d0b5c6cd7f6d0106dbbc782d \ + --hash=sha256:0fde9c7a3d5d37f3fe5d18c4ed015e8f585b68b26d72a10d7012cad61afe43ff \ + --hash=sha256:11517f0b4f4acbab0c37c674b4d1aad3c3dfea0f6b1bb322e921555258101ab3 \ + --hash=sha256:150db335143edd00d3ec669c7c8167d401c4aa0a290749351c80bbf146892b2e \ + --hash=sha256:24040a20208e9b16ba7b284624ebfe67e40f5c40b5dc8d874da322ac0053f9d3 \ + --hash=sha256:33c457728a1ce825b80aa8c8ed573709f1efe72003d45fa6fdbb444de9cc0b74 \ + --hash=sha256:423cd6a14810f4e40cb76e13d4240040fc1594d69fe1c4f2c70be00ad512ade5 \ + --hash=sha256:5387db80c6a7b5598884bf4df3fc546b3373771ad614548b782e840b71704877 \ + --hash=sha256:5a76ec44af838862b23fb5cfc48765bc7978f7b58a181c96ad92856280de548b \ + --hash=sha256:5f2660f59dfcfd34adac7c08dc7f615920de703f191066ed6277628975f06878 \ + --hash=sha256:6b7bd8f5aa327cc32a1b9b02a76502851575f5edb110f93c59a45c70211a5618 \ + --hash=sha256:72cf3477538bd8504f14d6299a387cc335444f7a188f548096dfea9533551f02 \ + --hash=sha256:76b75a9cfc572e890a1e000fd532bdd2084ec3f1ee94ee51802a477913a21072 \ + --hash=sha256:a81adbfbe2f6528d4593b5a8962b2751838517401d14e9d4cab6787478802693 \ + --hash=sha256:a968375c66e505f72b421f5864a37f51aad5da61b6396fa283f956e9f2b2b923 \ + --hash=sha256:afd4f7c0a225a326d2c0039cdc8631b5e8be30f78f6b7a3e5ce741cf5dd81c72 \ + --hash=sha256:b05bdd513f045d43228247ef4d9269c88139788e2d566f4cb3e855e282ad0330 \ + --hash=sha256:c2733c9bcd00074ce5497dd0a7b8a10c91d3395ddce322d7021c7fdc4ea6f610 \ + --hash=sha256:d0f080b2d9720bec42624cb0df66f60ae66b84a2ccd1fe2c291322df915ac9db \ + --hash=sha256:dcd20ee0240a88772eeb5691102c276f5cdec79527fb3a0679af7f93f93cb4bd \ + --hash=sha256:e1351576877764fb4d5690e4721ce902e987c85f4ab081c70a34e1d24646586e \ + --hash=sha256:e44dfd7e61c9eb6dda59bc49ad69e77945f6d049185a517c130417e3ca0494d8 \ + --hash=sha256:ee3d87615876550fee9a523307dd4b00f0f44cf47a94a32a07793da307df31a0 \ + --hash=sha256:fa7b165cfa97158c1e6d15c68428317b4f4ae786d1dc2dbab43f1328c1eb43aa \ + --hash=sha256:fe976695318560a97c6d31bba828eeca28c44c6f6401005e54ba476a28ac0a10 + # via -r requirements.in +pyparsing==2.4.7 \ + --hash=sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1 \ + --hash=sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b + # via packaging +pytest==6.2.4 \ + --hash=sha256:50bcad0a0b9c5a72c8e4e7c9855a3ad496ca6a881a3641b4260605450772c54b \ + --hash=sha256:91ef2131a9bd6be8f76f1f08eac5c5317221d6ad1e143ae03894b862e8976890 + # via -r requirements-test.in +toml==0.10.2 \ + --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ + --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f + # via + # -r requirements.in + # maturin + # pytest diff --git a/python/requirements.txt b/python/requirements.txt index ff02b80cf6fc..635eb2278482 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,19 +1,3 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. # # This file is autogenerated by pip-compile # To update, run: From 8459db30903d4477ce348daa4e5ecb59c908a6a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 7 Jun 2021 11:05:46 +0200 Subject: [PATCH 7/9] Update workflow file --- .github/workflows/python_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_test.yaml b/.github/workflows/python_test.yaml index f072363d0a8f..41842bbfef93 100644 --- a/.github/workflows/python_test.yaml +++ b/.github/workflows/python_test.yaml @@ -50,7 +50,7 @@ jobs: python -m venv venv source venv/bin/activate - pip install -r requirements.txt pytest + pip install -r requirements-test.txt maturin develop pytest -v . From 1bb075c745cd844d02c7c5dbd7f2cdd1a7b5c544 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 7 Jun 2021 12:37:21 +0200 Subject: [PATCH 8/9] Merge requirements file --- python/requirements-test.in | 18 ------ python/requirements-test.txt | 107 ----------------------------------- python/requirements.in | 1 + python/requirements.txt | 31 +++++++++- 4 files changed, 31 insertions(+), 126 deletions(-) delete mode 100644 python/requirements-test.in delete mode 100644 python/requirements-test.txt diff --git a/python/requirements-test.in b/python/requirements-test.in deleted file mode 100644 index 95b4932fe20b..000000000000 --- a/python/requirements-test.in +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. --r requirements.in -pytest diff --git a/python/requirements-test.txt b/python/requirements-test.txt deleted file mode 100644 index b5dfb8a8c3c3..000000000000 --- a/python/requirements-test.txt +++ /dev/null @@ -1,107 +0,0 @@ -# -# This file is autogenerated by pip-compile -# To update, run: -# -# pip-compile --generate-hashes requirements-test.in -# -attrs==21.2.0 \ - --hash=sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1 \ - --hash=sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb - # via pytest -iniconfig==1.1.1 \ - --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ - --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 - # via pytest -maturin==0.10.6 \ - --hash=sha256:0e81496f70a4805e6ea7dda7b0425246c111ccb119a2e22c64abeff131f4dd21 \ - --hash=sha256:3b5d5429bc05a816824420d99973f0cab39d8e274f6c3647bfd9afd95a030304 \ - --hash=sha256:4177a223727a0ad57bc3f69ca4c3bc04bb3cc4da787cc59a8e25808c85685c67 \ - --hash=sha256:4eb4481b6c7d6cac043b969d2eb993c982523e91bb2709f0b09e231cf4846731 \ - --hash=sha256:532625f312185b06ec196fdb0fc79efafc0e98768153d226fb9417c0ca85e410 \ - --hash=sha256:53ef64a147f8a5241a3e932f2db22b5ae7dc5892dae994da319446c5db89dc94 \ - --hash=sha256:a04589da42f62b1d515f35c81274a56fe0d29216894525e8a37fd1e3c69d87b1 \ - --hash=sha256:b58e9e2ba5a3f651d8885c41370a00bb1d3e4d7313cbb63354077153be7650f4 \ - --hash=sha256:bd39f7e08eb9908d4fe1cd9b3c953fad5b1fb4fec9c82d14c2973a65751e1899 \ - --hash=sha256:d63f2a15f0b8db4e70d9a59766ca240b2c2ee2146ed5e4385a6118d941d68b25 \ - --hash=sha256:fa7e1cea2a768257a33aeb556fdec5fc36011bfe82d96730117433c635629dd8 - # via -r requirements.in -numpy==1.20.3 \ - --hash=sha256:1676b0a292dd3c99e49305a16d7a9f42a4ab60ec522eac0d3dd20cdf362ac010 \ - --hash=sha256:16f221035e8bd19b9dc9a57159e38d2dd060b48e93e1d843c49cb370b0f415fd \ - --hash=sha256:43909c8bb289c382170e0282158a38cf306a8ad2ff6dfadc447e90f9961bef43 \ - --hash=sha256:4e465afc3b96dbc80cf4a5273e5e2b1e3451286361b4af70ce1adb2984d392f9 \ - --hash=sha256:55b745fca0a5ab738647d0e4db099bd0a23279c32b31a783ad2ccea729e632df \ - --hash=sha256:5d050e1e4bc9ddb8656d7b4f414557720ddcca23a5b88dd7cff65e847864c400 \ - --hash=sha256:637d827248f447e63585ca3f4a7d2dfaa882e094df6cfa177cc9cf9cd6cdf6d2 \ - --hash=sha256:6690080810f77485667bfbff4f69d717c3be25e5b11bb2073e76bb3f578d99b4 \ - --hash=sha256:66fbc6fed94a13b9801fb70b96ff30605ab0a123e775a5e7a26938b717c5d71a \ - --hash=sha256:67d44acb72c31a97a3d5d33d103ab06d8ac20770e1c5ad81bdb3f0c086a56cf6 \ - --hash=sha256:6ca2b85a5997dabc38301a22ee43c82adcb53ff660b89ee88dded6b33687e1d8 \ - --hash=sha256:6e51534e78d14b4a009a062641f465cfaba4fdcb046c3ac0b1f61dd97c861b1b \ - --hash=sha256:70eb5808127284c4e5c9e836208e09d685a7978b6a216db85960b1a112eeace8 \ - --hash=sha256:830b044f4e64a76ba71448fce6e604c0fc47a0e54d8f6467be23749ac2cbd2fb \ - --hash=sha256:8b7bb4b9280da3b2856cb1fc425932f46fba609819ee1c62256f61799e6a51d2 \ - --hash=sha256:a9c65473ebc342715cb2d7926ff1e202c26376c0dcaaee85a1fd4b8d8c1d3b2f \ - --hash=sha256:c1c09247ccea742525bdb5f4b5ceeacb34f95731647fe55774aa36557dbb5fa4 \ - --hash=sha256:c5bf0e132acf7557fc9bb8ded8b53bbbbea8892f3c9a1738205878ca9434206a \ - --hash=sha256:db250fd3e90117e0312b611574cd1b3f78bec046783195075cbd7ba9c3d73f16 \ - --hash=sha256:e515c9a93aebe27166ec9593411c58494fa98e5fcc219e47260d9ab8a1cc7f9f \ - --hash=sha256:e55185e51b18d788e49fe8305fd73ef4470596b33fc2c1ceb304566b99c71a69 \ - --hash=sha256:ea9cff01e75a956dbee133fa8e5b68f2f92175233de2f88de3a682dd94deda65 \ - --hash=sha256:f1452578d0516283c87608a5a5548b0cdde15b99650efdfd85182102ef7a7c17 \ - --hash=sha256:f39a995e47cb8649673cfa0579fbdd1cdd33ea497d1728a6cb194d6252268e48 - # via pyarrow -packaging==20.9 \ - --hash=sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5 \ - --hash=sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a - # via pytest -pluggy==0.13.1 \ - --hash=sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0 \ - --hash=sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d - # via pytest -py==1.10.0 \ - --hash=sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3 \ - --hash=sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a - # via pytest -pyarrow==4.0.1 \ - --hash=sha256:04be0f7cb9090bd029b5b53bed628548fef569e5d0b5c6cd7f6d0106dbbc782d \ - --hash=sha256:0fde9c7a3d5d37f3fe5d18c4ed015e8f585b68b26d72a10d7012cad61afe43ff \ - --hash=sha256:11517f0b4f4acbab0c37c674b4d1aad3c3dfea0f6b1bb322e921555258101ab3 \ - --hash=sha256:150db335143edd00d3ec669c7c8167d401c4aa0a290749351c80bbf146892b2e \ - --hash=sha256:24040a20208e9b16ba7b284624ebfe67e40f5c40b5dc8d874da322ac0053f9d3 \ - --hash=sha256:33c457728a1ce825b80aa8c8ed573709f1efe72003d45fa6fdbb444de9cc0b74 \ - --hash=sha256:423cd6a14810f4e40cb76e13d4240040fc1594d69fe1c4f2c70be00ad512ade5 \ - --hash=sha256:5387db80c6a7b5598884bf4df3fc546b3373771ad614548b782e840b71704877 \ - --hash=sha256:5a76ec44af838862b23fb5cfc48765bc7978f7b58a181c96ad92856280de548b \ - --hash=sha256:5f2660f59dfcfd34adac7c08dc7f615920de703f191066ed6277628975f06878 \ - --hash=sha256:6b7bd8f5aa327cc32a1b9b02a76502851575f5edb110f93c59a45c70211a5618 \ - --hash=sha256:72cf3477538bd8504f14d6299a387cc335444f7a188f548096dfea9533551f02 \ - --hash=sha256:76b75a9cfc572e890a1e000fd532bdd2084ec3f1ee94ee51802a477913a21072 \ - --hash=sha256:a81adbfbe2f6528d4593b5a8962b2751838517401d14e9d4cab6787478802693 \ - --hash=sha256:a968375c66e505f72b421f5864a37f51aad5da61b6396fa283f956e9f2b2b923 \ - --hash=sha256:afd4f7c0a225a326d2c0039cdc8631b5e8be30f78f6b7a3e5ce741cf5dd81c72 \ - --hash=sha256:b05bdd513f045d43228247ef4d9269c88139788e2d566f4cb3e855e282ad0330 \ - --hash=sha256:c2733c9bcd00074ce5497dd0a7b8a10c91d3395ddce322d7021c7fdc4ea6f610 \ - --hash=sha256:d0f080b2d9720bec42624cb0df66f60ae66b84a2ccd1fe2c291322df915ac9db \ - --hash=sha256:dcd20ee0240a88772eeb5691102c276f5cdec79527fb3a0679af7f93f93cb4bd \ - --hash=sha256:e1351576877764fb4d5690e4721ce902e987c85f4ab081c70a34e1d24646586e \ - --hash=sha256:e44dfd7e61c9eb6dda59bc49ad69e77945f6d049185a517c130417e3ca0494d8 \ - --hash=sha256:ee3d87615876550fee9a523307dd4b00f0f44cf47a94a32a07793da307df31a0 \ - --hash=sha256:fa7b165cfa97158c1e6d15c68428317b4f4ae786d1dc2dbab43f1328c1eb43aa \ - --hash=sha256:fe976695318560a97c6d31bba828eeca28c44c6f6401005e54ba476a28ac0a10 - # via -r requirements.in -pyparsing==2.4.7 \ - --hash=sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1 \ - --hash=sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b - # via packaging -pytest==6.2.4 \ - --hash=sha256:50bcad0a0b9c5a72c8e4e7c9855a3ad496ca6a881a3641b4260605450772c54b \ - --hash=sha256:91ef2131a9bd6be8f76f1f08eac5c5317221d6ad1e143ae03894b862e8976890 - # via -r requirements-test.in -toml==0.10.2 \ - --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ - --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f - # via - # -r requirements.in - # maturin - # pytest diff --git a/python/requirements.in b/python/requirements.in index 3ef9f18966d4..4ff7f4ee618b 100644 --- a/python/requirements.in +++ b/python/requirements.in @@ -17,3 +17,4 @@ maturin toml pyarrow +pytest diff --git a/python/requirements.txt b/python/requirements.txt index 635eb2278482..f7ede1ebd58e 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -2,8 +2,16 @@ # This file is autogenerated by pip-compile # To update, run: # -# pip-compile --generate-hashes +# pip-compile --generate-hashes requirements.in # +attrs==21.2.0 \ + --hash=sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1 \ + --hash=sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb + # via pytest +iniconfig==1.1.1 \ + --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ + --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 + # via pytest maturin==0.10.6 \ --hash=sha256:0e81496f70a4805e6ea7dda7b0425246c111ccb119a2e22c64abeff131f4dd21 \ --hash=sha256:3b5d5429bc05a816824420d99973f0cab39d8e274f6c3647bfd9afd95a030304 \ @@ -43,6 +51,18 @@ numpy==1.20.3 \ --hash=sha256:f1452578d0516283c87608a5a5548b0cdde15b99650efdfd85182102ef7a7c17 \ --hash=sha256:f39a995e47cb8649673cfa0579fbdd1cdd33ea497d1728a6cb194d6252268e48 # via pyarrow +packaging==20.9 \ + --hash=sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5 \ + --hash=sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a + # via pytest +pluggy==0.13.1 \ + --hash=sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0 \ + --hash=sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d + # via pytest +py==1.10.0 \ + --hash=sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3 \ + --hash=sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a + # via pytest pyarrow==4.0.1 \ --hash=sha256:04be0f7cb9090bd029b5b53bed628548fef569e5d0b5c6cd7f6d0106dbbc782d \ --hash=sha256:0fde9c7a3d5d37f3fe5d18c4ed015e8f585b68b26d72a10d7012cad61afe43ff \ @@ -70,9 +90,18 @@ pyarrow==4.0.1 \ --hash=sha256:fa7b165cfa97158c1e6d15c68428317b4f4ae786d1dc2dbab43f1328c1eb43aa \ --hash=sha256:fe976695318560a97c6d31bba828eeca28c44c6f6401005e54ba476a28ac0a10 # via -r requirements.in +pyparsing==2.4.7 \ + --hash=sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1 \ + --hash=sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b + # via packaging +pytest==6.2.4 \ + --hash=sha256:50bcad0a0b9c5a72c8e4e7c9855a3ad496ca6a881a3641b4260605450772c54b \ + --hash=sha256:91ef2131a9bd6be8f76f1f08eac5c5317221d6ad1e143ae03894b862e8976890 + # via -r requirements.in toml==0.10.2 \ --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f # via # -r requirements.in # maturin + # pytest From 00ce946961832f5ba1d83e066f5d5c7461d82505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 7 Jun 2021 13:14:51 +0200 Subject: [PATCH 9/9] Update workflow file --- .github/workflows/python_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_test.yaml b/.github/workflows/python_test.yaml index 41842bbfef93..e689396b5dcd 100644 --- a/.github/workflows/python_test.yaml +++ b/.github/workflows/python_test.yaml @@ -50,7 +50,7 @@ jobs: python -m venv venv source venv/bin/activate - pip install -r requirements-test.txt + pip install -r requirements.txt maturin develop pytest -v .