From d9d855bef35d047123169a5f7a34052f21739765 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 3 Jun 2021 15:58:57 +0200
Subject: [PATCH 1/9] Use pytest

---
 python/tests/generic.py   |  54 ++++--
 python/tests/test_df.py   | 136 +++++++-------
 python/tests/test_sql.py  | 385 ++++++++++++++------------------------
 python/tests/test_udaf.py |  85 +++++----
 4 files changed, 288 insertions(+), 372 deletions(-)

diff --git a/python/tests/generic.py b/python/tests/generic.py
index 267d6f656ce0..06c646585c64 100644
--- a/python/tests/generic.py
+++ b/python/tests/generic.py
@@ -16,24 +16,31 @@
 # under the License.
 
 import datetime
-import numpy
-import pyarrow
+import os.path
+import shutil
+
+import numpy as np
+import pyarrow as pa
+import pyarrow.parquet as pq
+import datafusion
 
 # used to write parquet files
-import pyarrow.parquet
 
 
 def data():
-    data = numpy.concatenate(
-        [numpy.random.normal(0, 0.01, size=50), numpy.random.normal(50, 0.01, size=50)]
-    )
-    return pyarrow.array(data)
+    np.random.seed(1)
+    data = np.concatenate([
+        np.random.normal(0, 0.01, size=50),
+        np.random.normal(50, 0.01, size=50)
+    ])
+    return pa.array(data)
 
 
 def data_with_nans():
-    data = numpy.random.normal(0, 0.01, size=50)
-    mask = numpy.random.randint(0, 2, size=50)
-    data[mask == 0] = numpy.NaN
+    np.random.seed(0)
+    data = np.random.normal(0, 0.01, size=50)
+    mask = np.random.randint(0, 2, size=50)
+    data[mask == 0] = np.NaN
     return data
 
 
@@ -43,8 +50,19 @@ def data_datetime(f):
         datetime.datetime.now() - datetime.timedelta(days=1),
         datetime.datetime.now() + datetime.timedelta(days=1),
     ]
-    return pyarrow.array(
-        data, type=pyarrow.timestamp(f), mask=numpy.array([False, True, False])
+    return pa.array(
+        data, type=pa.timestamp(f), mask=np.array([False, True, False])
+    )
+
+
+def data_date32():
+    data = [
+        datetime.date(2000, 1, 1),
+        datetime.date(1980, 1, 1),
+        datetime.date(2030, 1, 1),
+    ]
+    return pa.array(
+        data, type=pa.date32(), mask=np.array([False, True, False])
     )
 
 
@@ -54,16 +72,16 @@ def data_timedelta(f):
         datetime.timedelta(days=1),
         datetime.timedelta(seconds=1),
     ]
-    return pyarrow.array(
-        data, type=pyarrow.duration(f), mask=numpy.array([False, True, False])
+    return pa.array(
+        data, type=pa.duration(f), mask=np.array([False, True, False])
     )
 
 
 def data_binary_other():
-    return numpy.array([1, 0, 0], dtype="u4")
+    return np.array([1, 0, 0], dtype="u4")
 
 
 def write_parquet(path, data):
-    table = pyarrow.Table.from_arrays([data], names=["a"])
-    pyarrow.parquet.write_table(table, path)
-    return path
+    table = pa.Table.from_arrays([data], names=["a"])
+    pq.write_table(table, path)
+    return str(path)
diff --git a/python/tests/test_df.py b/python/tests/test_df.py
index fdafdfa7f509..e79b91ec503a 100644
--- a/python/tests/test_df.py
+++ b/python/tests/test_df.py
@@ -15,100 +15,104 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import unittest
+import pytest
 
 import pyarrow as pa
-import datafusion
+from datafusion import ExecutionContext, functions as f
 
-f = datafusion.functions
 
+@pytest.fixture
+def df():
+    ctx = ExecutionContext()
 
-class TestCase(unittest.TestCase):
-    def _prepare(self):
-        ctx = datafusion.ExecutionContext()
+    # create a RecordBatch and a new DataFrame from it
+    batch = pa.RecordBatch.from_arrays(
+        [pa.array([1, 2, 3]), pa.array([4, 5, 6])],
+        names=["a", "b"],
+    )
 
-        # create a RecordBatch and a new DataFrame from it
-        batch = pa.RecordBatch.from_arrays(
-            [pa.array([1, 2, 3]), pa.array([4, 5, 6])],
-            names=["a", "b"],
-        )
-        return ctx.create_dataframe([[batch]])
+    return ctx.create_dataframe([[batch]])
 
-    def test_select(self):
-        df = self._prepare()
 
-        df = df.select(
-            f.col("a") + f.col("b"),
-            f.col("a") - f.col("b"),
-        )
+def test_select(df):
+    df = df.select(
+        f.col("a") + f.col("b"),
+        f.col("a") - f.col("b"),
+    )
 
-        # execute and collect the first (and only) batch
-        result = df.collect()[0]
+    # execute and collect the first (and only) batch
+    result = df.collect()[0]
 
-        self.assertEqual(result.column(0), pa.array([5, 7, 9]))
-        self.assertEqual(result.column(1), pa.array([-3, -3, -3]))
+    assert result.column(0) == pa.array([5, 7, 9])
+    assert result.column(1) == pa.array([-3, -3, -3])
 
-    def test_filter(self):
-        df = self._prepare()
 
-        df = df.select(
+def test_filter(df):
+    df = df \
+        .select(
             f.col("a") + f.col("b"),
             f.col("a") - f.col("b"),
-        ).filter(f.col("a") > f.lit(2))
+        ) \
+        .filter(f.col("a") > f.lit(2))
+
+    # execute and collect the first (and only) batch
+    result = df.collect()[0]
+
+    assert result.column(0) == pa.array([9])
+    assert result.column(1) == pa.array([-3])
+
 
-        # execute and collect the first (and only) batch
-        result = df.collect()[0]
+def test_sort(df):
+    df = df.sort([
+        f.col("b").sort(ascending=False)
+    ])
 
-        self.assertEqual(result.column(0), pa.array([9]))
-        self.assertEqual(result.column(1), pa.array([-3]))
+    table = pa.Table.from_batches(df.collect())
+    expected = {'a': [3, 2, 1], 'b': [6, 5, 4]}
 
-    def test_sort(self):
-        df = self._prepare()
-        df = df.sort([f.col("b").sort(ascending=False)])
+    assert table.to_pydict() == expected
 
-        table = pa.Table.from_batches(df.collect())
-        expected = {"a": [3, 2, 1], "b": [6, 5, 4]}
-        self.assertEqual(table.to_pydict(), expected)
 
-    def test_limit(self):
-        df = self._prepare()
+def test_limit(df):
+    df = df.limit(1)
 
-        df = df.limit(1)
+    # execute and collect the first (and only) batch
+    result = df.collect()[0]
 
-        # execute and collect the first (and only) batch
-        result = df.collect()[0]
+    assert len(result.column(0)) == 1
+    assert len(result.column(1)) == 1
 
-        self.assertEqual(len(result.column(0)), 1)
-        self.assertEqual(len(result.column(1)), 1)
 
-    def test_udf(self):
-        df = self._prepare()
+def test_udf(df):
+    # is_null is a pa function over arrays
+    udf = f.udf(lambda x: x.is_null(), [pa.int64()], pa.bool_())
 
-        # is_null is a pa function over arrays
-        udf = f.udf(lambda x: x.is_null(), [pa.int64()], pa.bool_())
+    df = df.select(udf(f.col("a")))
+    result = df.collect()[0].column(0)
 
-        df = df.select(udf(f.col("a")))
+    assert result == pa.array([False, False, False])
 
-        self.assertEqual(df.collect()[0].column(0), pa.array([False, False, False]))
 
-    def test_join(self):
-        ctx = datafusion.ExecutionContext()
+def test_join():
+    ctx = ExecutionContext()
 
-        batch = pa.RecordBatch.from_arrays(
-            [pa.array([1, 2, 3]), pa.array([4, 5, 6])],
-            names=["a", "b"],
-        )
-        df = ctx.create_dataframe([[batch]])
+    batch = pa.RecordBatch.from_arrays(
+        [pa.array([1, 2, 3]), pa.array([4, 5, 6])],
+        names=["a", "b"],
+    )
+    df = ctx.create_dataframe([[batch]])
 
-        batch = pa.RecordBatch.from_arrays(
-            [pa.array([1, 2]), pa.array([8, 10])],
-            names=["a", "c"],
-        )
-        df1 = ctx.create_dataframe([[batch]])
+    batch = pa.RecordBatch.from_arrays(
+        [pa.array([1, 2]), pa.array([8, 10])],
+        names=["a", "c"],
+    )
+    df1 = ctx.create_dataframe([[batch]])
 
-        df = df.join(df1, on="a", how="inner")
-        df = df.sort([f.col("a").sort(ascending=True)])
-        table = pa.Table.from_batches(df.collect())
+    df = df.join(df1, on="a", how="inner")
+    df = df.sort([
+        f.col("a").sort(ascending=True)
+    ])
+    table = pa.Table.from_batches(df.collect())
 
-        expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]}
-        self.assertEqual(table.to_pydict(), expected)
+    expected = {'a': [1, 2], 'c': [8, 10], 'b': [4, 5]}
+    assert table.to_pydict() == expected
diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py
index 117284973fb7..15c8e43c4bd3 100644
--- a/python/tests/test_sql.py
+++ b/python/tests/test_sql.py
@@ -15,286 +15,181 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import unittest
 import tempfile
 import datetime
 import os.path
 import shutil
 
-import numpy
-import pyarrow
-import datafusion
+import pytest
+import numpy as np
+import pyarrow as pa
+import pyarrow.parquet as pq
 
-# used to write parquet files
-import pyarrow.parquet
+from datafusion import ExecutionContext
+import datafusion
 
 from tests.generic import *
 
 
-class TestCase(unittest.TestCase):
-    def setUp(self):
-        # Create a temporary directory
-        self.test_dir = tempfile.mkdtemp()
-        numpy.random.seed(1)
-
-    def tearDown(self):
-        # Remove the directory after the test
-        shutil.rmtree(self.test_dir)
-
-    def test_no_table(self):
-        with self.assertRaises(Exception):
-            datafusion.Context().sql("SELECT a FROM b").collect()
-
-    def test_register(self):
-        ctx = datafusion.ExecutionContext()
-
-        path = write_parquet(os.path.join(self.test_dir, "a.parquet"), data())
+@pytest.fixture
+def ctx():
+    return ExecutionContext()
 
-        ctx.register_parquet("t", path)
 
-        self.assertEqual(ctx.tables(), {"t"})
+def test_no_table(ctx):
+    #TODO(kszucs): should raise a DataFusionError instead of plain Exeption
+    with pytest.raises(Exception, match="DataFusion error"):
+        ctx.sql("SELECT a FROM b").collect()
 
-    def test_execute(self):
-        data = [1, 1, 2, 2, 3, 11, 12]
 
-        ctx = datafusion.ExecutionContext()
+def test_register(ctx, tmp_path):
+    path = write_parquet(tmp_path / "a.parquet", data())
+    ctx.register_parquet("t", path)
 
-        # single column, "a"
-        path = write_parquet(
-            os.path.join(self.test_dir, "a.parquet"), pyarrow.array(data)
-        )
-        ctx.register_parquet("t", path)
-
-        self.assertEqual(ctx.tables(), {"t"})
-
-        # count
-        result = ctx.sql("SELECT COUNT(a) FROM t").collect()
+    assert ctx.tables() == {"t"}
 
-        expected = pyarrow.array([7], pyarrow.uint64())
-        expected = [pyarrow.RecordBatch.from_arrays([expected], ["COUNT(a)"])]
-        self.assertEqual(expected, result)
 
-        # where
-        expected = pyarrow.array([2], pyarrow.uint64())
-        expected = [pyarrow.RecordBatch.from_arrays([expected], ["COUNT(a)"])]
-        self.assertEqual(
-            expected, ctx.sql("SELECT COUNT(a) FROM t WHERE a > 10").collect()
-        )
+def test_execute(ctx, tmp_path):
+    data = [1, 1, 2, 2, 3, 11, 12]
 
-        # group by
-        results = ctx.sql(
-            "SELECT CAST(a as int), COUNT(a) FROM t GROUP BY CAST(a as int)"
-        ).collect()
-
-        # group by returns batches
-        result_keys = []
-        result_values = []
-        for result in results:
-            pydict = result.to_pydict()
-            result_keys.extend(pydict["CAST(a AS Int32)"])
-            result_values.extend(pydict["COUNT(a)"])
-
-        result_keys, result_values = (
-            list(t) for t in zip(*sorted(zip(result_keys, result_values)))
-        )
+    # single column, "a"
+    path = write_parquet(tmp_path / "a.parquet", pa.array(data))
+    ctx.register_parquet("t", path)
 
-        self.assertEqual(result_keys, [1, 2, 3, 11, 12])
-        self.assertEqual(result_values, [2, 2, 1, 1, 1])
-
-        # order by
-        result = ctx.sql(
-            "SELECT a, CAST(a AS int) FROM t ORDER BY a DESC LIMIT 2"
-        ).collect()
-        expected_a = pyarrow.array([50.0219, 50.0152], pyarrow.float64())
-        expected_cast = pyarrow.array([50, 50], pyarrow.int32())
-        expected = [
-            pyarrow.RecordBatch.from_arrays(
-                [expected_a, expected_cast], ["a", "CAST(a AS Int32)"]
-            )
-        ]
-        numpy.testing.assert_equal(expected[0].column(1), expected[0].column(1))
-
-    def test_cast(self):
-        """
-        Verify that we can cast
-        """
-        ctx = datafusion.ExecutionContext()
-
-        path = write_parquet(os.path.join(self.test_dir, "a.parquet"), data())
-        ctx.register_parquet("t", path)
-
-        valid_types = [
-            "smallint",
-            "int",
-            "bigint",
-            "float(32)",
-            "float(64)",
-            "float",
-        ]
-
-        select = ", ".join(
-            [f"CAST(9 AS {t}) AS A{i}" for i, t in enumerate(valid_types)]
-        )
+    assert ctx.tables() == {"t"}
 
-        # can execute, which implies that we can cast
-        ctx.sql(f"SELECT {select} FROM t").collect()
+    # count
+    result = ctx.sql("SELECT COUNT(a) FROM t").collect()
 
-    def _test_udf(self, udf, args, return_type, array, expected):
-        ctx = datafusion.ExecutionContext()
+    expected = pa.array([7], pa.uint64())
+    expected = [pa.RecordBatch.from_arrays([expected], ["COUNT(a)"])]
+    assert result == expected
 
-        # write to disk
-        path = write_parquet(os.path.join(self.test_dir, "a.parquet"), array)
-        ctx.register_parquet("t", path)
+    # where
+    expected = pa.array([2], pa.uint64())
+    expected = [pa.RecordBatch.from_arrays([expected], ["COUNT(a)"])]
+    result = ctx.sql("SELECT COUNT(a) FROM t WHERE a > 10").collect()
+    assert result == expected
 
-        ctx.register_udf("udf", udf, args, return_type)
+    # group by
+    results = ctx.sql(
+        "SELECT CAST(a as int), COUNT(a) FROM t GROUP BY CAST(a as int)"
+    ).collect()
 
-        batches = ctx.sql("SELECT udf(a) AS tt FROM t").collect()
+    # group by returns batches
+    result_keys = []
+    result_values = []
+    for result in results:
+        pydict = result.to_pydict()
+        result_keys.extend(pydict["CAST(a AS Int32)"])
+        result_values.extend(pydict["COUNT(a)"])
 
-        result = batches[0].column(0)
+    result_keys, result_values = (
+        list(t) for t in zip(*sorted(zip(result_keys, result_values)))
+    )
 
-        self.assertEqual(expected, result)
+    assert result_keys == [1, 2, 3, 11, 12]
+    assert result_values == [2, 2, 1, 1, 1]
 
-    def test_udf_identity(self):
-        self._test_udf(
-            lambda x: x,
-            [pyarrow.float64()],
-            pyarrow.float64(),
-            pyarrow.array([-1.2, None, 1.2]),
-            pyarrow.array([-1.2, None, 1.2]),
+    # order by
+    result = ctx.sql(
+        "SELECT a, CAST(a AS int) FROM t ORDER BY a DESC LIMIT 2"
+    ).collect()
+    expected_a = pa.array([50.0219, 50.0152], pa.float64())
+    expected_cast = pa.array([50, 50], pa.int32())
+    expected = [
+        pa.RecordBatch.from_arrays(
+            [expected_a, expected_cast], ["a", "CAST(a AS Int32)"]
         )
-
-    def test_udf(self):
-        self._test_udf(
+    ]
+    np.testing.assert_equal(expected[0].column(1), expected[0].column(1))
+
+
+def test_cast(ctx, tmp_path):
+    """
+    Verify that we can cast
+    """
+    path = write_parquet(tmp_path / "a.parquet", data())
+    ctx.register_parquet("t", path)
+
+    valid_types = [
+        "smallint",
+        "int",
+        "bigint",
+        "float(32)",
+        "float(64)",
+        "float",
+    ]
+
+    select = ", ".join(
+        [f"CAST(9 AS {t}) AS A{i}" for i, t in enumerate(valid_types)]
+    )
+
+    # can execute, which implies that we can cast
+    ctx.sql(f"SELECT {select} FROM t").collect()
+
+
+@pytest.mark.parametrize(
+    ("fn", "input_types", "output_type", "input_values", "expected_values"),
+    [
+        (
+            lambda x: x,
+            [pa.float64()],
+            pa.float64(),
+            [-1.2, None, 1.2],
+            [-1.2, None, 1.2]
+        ),
+        (
             lambda x: x.is_null(),
-            [pyarrow.float64()],
-            pyarrow.bool_(),
-            pyarrow.array([-1.2, None, 1.2]),
-            pyarrow.array([False, True, False]),
+            [pa.float64()],
+            pa.bool_(),
+            [-1.2, None, 1.2],
+            [False, True, False]
         )
+    ]
+)
+def test_udf(ctx, tmp_path, fn, input_types, output_type, input_values, expected_values):
+    # write to disk
+    path = write_parquet(tmp_path / "a.parquet", pa.array(input_values))
+    ctx.register_parquet("t", path)
+    ctx.register_udf("udf", fn, input_types, output_type)
 
+    batches = ctx.sql("SELECT udf(a) AS tt FROM t").collect()
+    result = batches[0].column(0)
 
-class TestIO(unittest.TestCase):
-    def setUp(self):
-        # Create a temporary directory
-        self.test_dir = tempfile.mkdtemp()
-
-    def tearDown(self):
-        # Remove the directory after the test
-        shutil.rmtree(self.test_dir)
-
-    def _test_data(self, data):
-        ctx = datafusion.ExecutionContext()
-
-        # write to disk
-        path = write_parquet(os.path.join(self.test_dir, "a.parquet"), data)
-        ctx.register_parquet("t", path)
+    assert result == pa.array(expected_values)
 
-        batches = ctx.sql("SELECT a AS tt FROM t").collect()
 
-        result = batches[0].column(0)
-
-        numpy.testing.assert_equal(data, result)
-
-    def test_nans(self):
-        self._test_data(data_with_nans())
-
-    def test_utf8(self):
-        array = pyarrow.array(
-            ["a", "b", "c"], pyarrow.utf8(), numpy.array([False, True, False])
-        )
-        self._test_data(array)
-
-    def test_large_utf8(self):
-        array = pyarrow.array(
-            ["a", "b", "c"], pyarrow.large_utf8(), numpy.array([False, True, False])
-        )
-        self._test_data(array)
+_null_mask = np.array([False, True, False])
 
-    # Error from Arrow
-    @unittest.expectedFailure
-    def test_datetime_s(self):
-        self._test_data(data_datetime("s"))
 
+@pytest.mark.parametrize('arr', [
+    pa.array(["a", "b", "c"], pa.utf8(), _null_mask),
+    pa.array(["a", "b", "c"], pa.large_utf8(), _null_mask),
+    pa.array([b"1", b"2", b"3"], pa.binary(), _null_mask),
+    pa.array([b"1111", b"2222", b"3333"], pa.large_binary(), _null_mask),
+    pa.array([False, True, True], None, _null_mask),
+    pa.array([0, 1, 2], None),
+    data_binary_other(),
+    data_date32(),
+    data_with_nans(),
     # C data interface missing
-    @unittest.expectedFailure
-    def test_datetime_ms(self):
-        self._test_data(data_datetime("ms"))
-
-    # C data interface missing
-    @unittest.expectedFailure
-    def test_datetime_us(self):
-        self._test_data(data_datetime("us"))
-
-    # Not writtable to parquet
-    @unittest.expectedFailure
-    def test_datetime_ns(self):
-        self._test_data(data_datetime("ns"))
-
-    # Not writtable to parquet
-    @unittest.expectedFailure
-    def test_timedelta_s(self):
-        self._test_data(data_timedelta("s"))
-
-    # Not writtable to parquet
-    @unittest.expectedFailure
-    def test_timedelta_ms(self):
-        self._test_data(data_timedelta("ms"))
-
+    pytest.param(pa.array([b"1111", b"2222", b"3333"], pa.binary(4), _null_mask), marks=pytest.mark.xfail),
+    pytest.param(data_datetime("s"), marks=pytest.mark.xfail),
+    pytest.param(data_datetime("ms"), marks=pytest.mark.xfail),
+    pytest.param(data_datetime("us"), marks=pytest.mark.xfail),
+    pytest.param(data_datetime("ns"), marks=pytest.mark.xfail),
     # Not writtable to parquet
-    @unittest.expectedFailure
-    def test_timedelta_us(self):
-        self._test_data(data_timedelta("us"))
-
-    # Not writtable to parquet
-    @unittest.expectedFailure
-    def test_timedelta_ns(self):
-        self._test_data(data_timedelta("ns"))
-
-    def test_date32(self):
-        array = pyarrow.array(
-            [
-                datetime.date(2000, 1, 1),
-                datetime.date(1980, 1, 1),
-                datetime.date(2030, 1, 1),
-            ],
-            pyarrow.date32(),
-            numpy.array([False, True, False]),
-        )
-        self._test_data(array)
-
-    def test_binary_variable(self):
-        array = pyarrow.array(
-            [b"1", b"2", b"3"], pyarrow.binary(), numpy.array([False, True, False])
-        )
-        self._test_data(array)
-
-    # C data interface missing
-    @unittest.expectedFailure
-    def test_binary_fixed(self):
-        array = pyarrow.array(
-            [b"1111", b"2222", b"3333"],
-            pyarrow.binary(4),
-            numpy.array([False, True, False]),
-        )
-        self._test_data(array)
-
-    def test_large_binary(self):
-        array = pyarrow.array(
-            [b"1111", b"2222", b"3333"],
-            pyarrow.large_binary(),
-            numpy.array([False, True, False]),
-        )
-        self._test_data(array)
-
-    def test_binary_other(self):
-        self._test_data(data_binary_other())
-
-    def test_bool(self):
-        array = pyarrow.array(
-            [False, True, True], None, numpy.array([False, True, False])
-        )
-        self._test_data(array)
-
-    def test_u32(self):
-        array = pyarrow.array([0, 1, 2], None, numpy.array([False, True, False]))
-        self._test_data(array)
+    pytest.param(data_timedelta("s"), marks=pytest.mark.xfail),
+    pytest.param(data_timedelta("ms"), marks=pytest.mark.xfail),
+    pytest.param(data_timedelta("us"), marks=pytest.mark.xfail),
+    pytest.param(data_timedelta("ns"), marks=pytest.mark.xfail),
+])
+def test_simple_select(ctx, tmp_path, arr):
+    path = write_parquet(tmp_path / "a.parquet", arr)
+    ctx.register_parquet("t", path)
+
+    batches = ctx.sql("SELECT a AS tt FROM t").collect()
+    result = batches[0].column(0)
+
+    np.testing.assert_equal(result, arr)
diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py
index e1e4f933a9b4..2a69bcee4b87 100644
--- a/python/tests/test_udaf.py
+++ b/python/tests/test_udaf.py
@@ -15,12 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import unittest
-import pyarrow
-import pyarrow.compute
-import datafusion
+import pytest
 
-f = datafusion.functions
+import pyarrow as pa
+import pyarrow.compute as pc
+
+from datafusion import ExecutionContext, functions as f
 
 
 class Accumulator:
@@ -29,63 +29,62 @@ class Accumulator:
     """
 
     def __init__(self):
-        self._sum = pyarrow.scalar(0.0)
+        self._sum = pa.scalar(0.0)
 
-    def to_scalars(self) -> [pyarrow.Scalar]:
+    def to_scalars(self) -> [pa.Scalar]:
         return [self._sum]
 
-    def update(self, values: pyarrow.Array) -> None:
+    def update(self, values: pa.Array) -> None:
         # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
-        self._sum = pyarrow.scalar(
-            self._sum.as_py() + pyarrow.compute.sum(values).as_py()
+        self._sum = pa.scalar(
+            self._sum.as_py() + pc.sum(values).as_py()
         )
 
-    def merge(self, states: pyarrow.Array) -> None:
+    def merge(self, states: pa.Array) -> None:
         # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
-        self._sum = pyarrow.scalar(
-            self._sum.as_py() + pyarrow.compute.sum(states).as_py()
+        self._sum = pa.scalar(
+            self._sum.as_py() + pc.sum(states).as_py()
         )
 
-    def evaluate(self) -> pyarrow.Scalar:
+    def evaluate(self) -> pa.Scalar:
         return self._sum
 
 
-class TestCase(unittest.TestCase):
-    def _prepare(self):
-        ctx = datafusion.ExecutionContext()
 
-        # create a RecordBatch and a new DataFrame from it
-        batch = pyarrow.RecordBatch.from_arrays(
-            [pyarrow.array([1, 2, 3]), pyarrow.array([4, 4, 6])],
-            names=["a", "b"],
-        )
-        return ctx.create_dataframe([[batch]])
+@pytest.fixture
+def df():
+    ctx = ExecutionContext()
 
-    def test_aggregate(self):
-        df = self._prepare()
+    # create a RecordBatch and a new DataFrame from it
+    batch = pa.RecordBatch.from_arrays(
+        [pa.array([1, 2, 3]), pa.array([4, 4, 6])],
+        names=["a", "b"],
+    )
+    return ctx.create_dataframe([[batch]])
 
-        udaf = f.udaf(
-            Accumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()]
-        )
 
-        df = df.aggregate([], [udaf(f.col("a"))])
+def test_aggregate(df):
+    udaf = f.udaf(
+        Accumulator, pa.float64(), pa.float64(), [pa.float64()]
+    )
 
-        # execute and collect the first (and only) batch
-        result = df.collect()[0]
+    df = df.aggregate([], [udaf(f.col("a"))])
 
-        self.assertEqual(result.column(0), pyarrow.array([1.0 + 2.0 + 3.0]))
+    # execute and collect the first (and only) batch
+    result = df.collect()[0]
 
-    def test_group_by(self):
-        df = self._prepare()
+    assert result.column(0) == pa.array([1.0 + 2.0 + 3.0])
 
-        udaf = f.udaf(
-            Accumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()]
-        )
 
-        df = df.aggregate([f.col("b")], [udaf(f.col("a"))])
+def test_group_by(df):
+    udaf = f.udaf(
+        Accumulator, pa.float64(), pa.float64(), [pa.float64()]
+    )
+
+    df = df.aggregate([f.col("b")], [udaf(f.col("a"))])
+
+    batches = df.collect()
+    arrays = [batch.column(1) for batch in batches]
+    joined = pa.concat_arrays(arrays)
+    assert joined == pa.array([1.0 + 2.0, 3.0]))
 
-        # execute and collect the first (and only) batch
-        batches = df.collect()
-        arrays = [batch.column(1) for batch in batches]
-        joined = pyarrow.concat_arrays(arrays)
-        self.assertEqual(joined, pyarrow.array([1.0 + 2.0, 3.0]))

From 68c456da4585b8a5b3f50698db8492f94c371078 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 3 Jun 2021 16:03:58 +0200
Subject: [PATCH 2/9] Formatting

---
 python/tests/generic.py   | 13 +++---
 python/tests/test_df.py   | 28 +++++--------
 python/tests/test_sql.py  | 88 ++++++++++++++++++++-------------------
 python/tests/test_udaf.py | 30 +++++--------
 4 files changed, 73 insertions(+), 86 deletions(-)

diff --git a/python/tests/generic.py b/python/tests/generic.py
index 06c646585c64..e61542e6ab37 100644
--- a/python/tests/generic.py
+++ b/python/tests/generic.py
@@ -16,23 +16,22 @@
 # under the License.
 
 import datetime
-import os.path
-import shutil
 
 import numpy as np
 import pyarrow as pa
 import pyarrow.parquet as pq
-import datafusion
 
 # used to write parquet files
 
 
 def data():
     np.random.seed(1)
-    data = np.concatenate([
-        np.random.normal(0, 0.01, size=50),
-        np.random.normal(50, 0.01, size=50)
-    ])
+    data = np.concatenate(
+        [
+            np.random.normal(0, 0.01, size=50),
+            np.random.normal(50, 0.01, size=50),
+        ]
+    )
     return pa.array(data)
 
 
diff --git a/python/tests/test_df.py b/python/tests/test_df.py
index e79b91ec503a..5b6cbddbd74b 100644
--- a/python/tests/test_df.py
+++ b/python/tests/test_df.py
@@ -15,10 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import pytest
-
 import pyarrow as pa
-from datafusion import ExecutionContext, functions as f
+import pytest
+from datafusion import ExecutionContext
+from datafusion import functions as f
 
 
 @pytest.fixture
@@ -48,12 +48,10 @@ def test_select(df):
 
 
 def test_filter(df):
-    df = df \
-        .select(
-            f.col("a") + f.col("b"),
-            f.col("a") - f.col("b"),
-        ) \
-        .filter(f.col("a") > f.lit(2))
+    df = df.select(
+        f.col("a") + f.col("b"),
+        f.col("a") - f.col("b"),
+    ).filter(f.col("a") > f.lit(2))
 
     # execute and collect the first (and only) batch
     result = df.collect()[0]
@@ -63,12 +61,10 @@ def test_filter(df):
 
 
 def test_sort(df):
-    df = df.sort([
-        f.col("b").sort(ascending=False)
-    ])
+    df = df.sort([f.col("b").sort(ascending=False)])
 
     table = pa.Table.from_batches(df.collect())
-    expected = {'a': [3, 2, 1], 'b': [6, 5, 4]}
+    expected = {"a": [3, 2, 1], "b": [6, 5, 4]}
 
     assert table.to_pydict() == expected
 
@@ -109,10 +105,8 @@ def test_join():
     df1 = ctx.create_dataframe([[batch]])
 
     df = df.join(df1, on="a", how="inner")
-    df = df.sort([
-        f.col("a").sort(ascending=True)
-    ])
+    df = df.sort([f.col("a").sort(ascending=True)])
     table = pa.Table.from_batches(df.collect())
 
-    expected = {'a': [1, 2], 'c': [8, 10], 'b': [4, 5]}
+    expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]}
     assert table.to_pydict() == expected
diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py
index 15c8e43c4bd3..4eaf2b259d7c 100644
--- a/python/tests/test_sql.py
+++ b/python/tests/test_sql.py
@@ -15,20 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import tempfile
-import datetime
-import os.path
-import shutil
-
-import pytest
 import numpy as np
 import pyarrow as pa
-import pyarrow.parquet as pq
-
+import pytest
 from datafusion import ExecutionContext
-import datafusion
 
-from tests.generic import *
+from . import generic as helpers
 
 
 @pytest.fixture
@@ -37,13 +29,13 @@ def ctx():
 
 
 def test_no_table(ctx):
-    #TODO(kszucs): should raise a DataFusionError instead of plain Exeption
+    # TODO(kszucs): should raise a DataFusionError instead of plain Exeption
     with pytest.raises(Exception, match="DataFusion error"):
         ctx.sql("SELECT a FROM b").collect()
 
 
 def test_register(ctx, tmp_path):
-    path = write_parquet(tmp_path / "a.parquet", data())
+    path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data())
     ctx.register_parquet("t", path)
 
     assert ctx.tables() == {"t"}
@@ -53,7 +45,7 @@ def test_execute(ctx, tmp_path):
     data = [1, 1, 2, 2, 3, 11, 12]
 
     # single column, "a"
-    path = write_parquet(tmp_path / "a.parquet", pa.array(data))
+    path = helpers.write_parquet(tmp_path / "a.parquet", pa.array(data))
     ctx.register_parquet("t", path)
 
     assert ctx.tables() == {"t"}
@@ -109,7 +101,7 @@ def test_cast(ctx, tmp_path):
     """
     Verify that we can cast
     """
-    path = write_parquet(tmp_path / "a.parquet", data())
+    path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data())
     ctx.register_parquet("t", path)
 
     valid_types = [
@@ -137,20 +129,24 @@ def test_cast(ctx, tmp_path):
             [pa.float64()],
             pa.float64(),
             [-1.2, None, 1.2],
-            [-1.2, None, 1.2]
+            [-1.2, None, 1.2],
         ),
         (
             lambda x: x.is_null(),
             [pa.float64()],
             pa.bool_(),
             [-1.2, None, 1.2],
-            [False, True, False]
-        )
-    ]
+            [False, True, False],
+        ),
+    ],
 )
-def test_udf(ctx, tmp_path, fn, input_types, output_type, input_values, expected_values):
+def test_udf(
+    ctx, tmp_path, fn, input_types, output_type, input_values, expected_values
+):
     # write to disk
-    path = write_parquet(tmp_path / "a.parquet", pa.array(input_values))
+    path = helpers.write_parquet(
+        tmp_path / "a.parquet", pa.array(input_values)
+    )
     ctx.register_parquet("t", path)
     ctx.register_udf("udf", fn, input_types, output_type)
 
@@ -163,30 +159,36 @@ def test_udf(ctx, tmp_path, fn, input_types, output_type, input_values, expected
 _null_mask = np.array([False, True, False])
 
 
-@pytest.mark.parametrize('arr', [
-    pa.array(["a", "b", "c"], pa.utf8(), _null_mask),
-    pa.array(["a", "b", "c"], pa.large_utf8(), _null_mask),
-    pa.array([b"1", b"2", b"3"], pa.binary(), _null_mask),
-    pa.array([b"1111", b"2222", b"3333"], pa.large_binary(), _null_mask),
-    pa.array([False, True, True], None, _null_mask),
-    pa.array([0, 1, 2], None),
-    data_binary_other(),
-    data_date32(),
-    data_with_nans(),
-    # C data interface missing
-    pytest.param(pa.array([b"1111", b"2222", b"3333"], pa.binary(4), _null_mask), marks=pytest.mark.xfail),
-    pytest.param(data_datetime("s"), marks=pytest.mark.xfail),
-    pytest.param(data_datetime("ms"), marks=pytest.mark.xfail),
-    pytest.param(data_datetime("us"), marks=pytest.mark.xfail),
-    pytest.param(data_datetime("ns"), marks=pytest.mark.xfail),
-    # Not writtable to parquet
-    pytest.param(data_timedelta("s"), marks=pytest.mark.xfail),
-    pytest.param(data_timedelta("ms"), marks=pytest.mark.xfail),
-    pytest.param(data_timedelta("us"), marks=pytest.mark.xfail),
-    pytest.param(data_timedelta("ns"), marks=pytest.mark.xfail),
-])
+@pytest.mark.parametrize(
+    "arr",
+    [
+        pa.array(["a", "b", "c"], pa.utf8(), _null_mask),
+        pa.array(["a", "b", "c"], pa.large_utf8(), _null_mask),
+        pa.array([b"1", b"2", b"3"], pa.binary(), _null_mask),
+        pa.array([b"1111", b"2222", b"3333"], pa.large_binary(), _null_mask),
+        pa.array([False, True, True], None, _null_mask),
+        pa.array([0, 1, 2], None),
+        helpers.data_binary_other(),
+        helpers.data_date32(),
+        helpers.data_with_nans(),
+        # C data interface missing
+        pytest.param(
+            pa.array([b"1111", b"2222", b"3333"], pa.binary(4), _null_mask),
+            marks=pytest.mark.xfail,
+        ),
+        pytest.param(helpers.data_datetime("s"), marks=pytest.mark.xfail),
+        pytest.param(helpers.data_datetime("ms"), marks=pytest.mark.xfail),
+        pytest.param(helpers.data_datetime("us"), marks=pytest.mark.xfail),
+        pytest.param(helpers.data_datetime("ns"), marks=pytest.mark.xfail),
+        # Not writtable to parquet
+        pytest.param(helpers.data_timedelta("s"), marks=pytest.mark.xfail),
+        pytest.param(helpers.data_timedelta("ms"), marks=pytest.mark.xfail),
+        pytest.param(helpers.data_timedelta("us"), marks=pytest.mark.xfail),
+        pytest.param(helpers.data_timedelta("ns"), marks=pytest.mark.xfail),
+    ],
+)
 def test_simple_select(ctx, tmp_path, arr):
-    path = write_parquet(tmp_path / "a.parquet", arr)
+    path = helpers.write_parquet(tmp_path / "a.parquet", arr)
     ctx.register_parquet("t", path)
 
     batches = ctx.sql("SELECT a AS tt FROM t").collect()
diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py
index 2a69bcee4b87..98ef95e12ace 100644
--- a/python/tests/test_udaf.py
+++ b/python/tests/test_udaf.py
@@ -15,12 +15,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import pytest
-
 import pyarrow as pa
 import pyarrow.compute as pc
-
-from datafusion import ExecutionContext, functions as f
+import pytest
+from datafusion import ExecutionContext
+from datafusion import functions as f
 
 
 class Accumulator:
@@ -35,22 +34,19 @@ def to_scalars(self) -> [pa.Scalar]:
         return [self._sum]
 
     def update(self, values: pa.Array) -> None:
-        # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
-        self._sum = pa.scalar(
-            self._sum.as_py() + pc.sum(values).as_py()
-        )
+        # Not nice since pyarrow scalars can't be summed yet.
+        # This breaks on `None`
+        self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py())
 
     def merge(self, states: pa.Array) -> None:
-        # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
-        self._sum = pa.scalar(
-            self._sum.as_py() + pc.sum(states).as_py()
-        )
+        # Not nice since pyarrow scalars can't be summed yet.
+        # This breaks on `None`
+        self._sum = pa.scalar(self._sum.as_py() + pc.sum(states).as_py())
 
     def evaluate(self) -> pa.Scalar:
         return self._sum
 
 
-
 @pytest.fixture
 def df():
     ctx = ExecutionContext()
@@ -64,9 +60,7 @@ def df():
 
 
 def test_aggregate(df):
-    udaf = f.udaf(
-        Accumulator, pa.float64(), pa.float64(), [pa.float64()]
-    )
+    udaf = f.udaf(Accumulator, pa.float64(), pa.float64(), [pa.float64()])
 
     df = df.aggregate([], [udaf(f.col("a"))])
 
@@ -77,9 +71,7 @@ def test_aggregate(df):
 
 
 def test_group_by(df):
-    udaf = f.udaf(
-        Accumulator, pa.float64(), pa.float64(), [pa.float64()]
-    )
+    udaf = f.udaf(Accumulator, pa.float64(), pa.float64(), [pa.float64()])
 
     df = df.aggregate([f.col("b")], [udaf(f.col("a"))])
 

From 4a4dd9edc8bf63bb0824063d47d04dfc4ebbf001 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 3 Jun 2021 16:09:04 +0200
Subject: [PATCH 3/9] Update GHA conf

---
 .github/workflows/python_test.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python_test.yaml b/.github/workflows/python_test.yaml
index 13516ff699da..f072363d0a8f 100644
--- a/.github/workflows/python_test.yaml
+++ b/.github/workflows/python_test.yaml
@@ -50,10 +50,10 @@ jobs:
           python -m venv venv
           source venv/bin/activate
 
-          pip install -r requirements.txt
+          pip install -r requirements.txt pytest
           maturin develop
 
-          python -m unittest discover tests
+          pytest -v .
         env:
           CARGO_HOME: "/home/runner/.cargo"
           CARGO_TARGET_DIR: "/home/runner/target"

From 2f9df4e9d6a7c82161fe8ba7a53044a33eee9a75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Thu, 3 Jun 2021 17:07:52 +0200
Subject: [PATCH 4/9] Remove TODO note

---
 python/tests/test_sql.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py
index 4eaf2b259d7c..361526d06970 100644
--- a/python/tests/test_sql.py
+++ b/python/tests/test_sql.py
@@ -29,7 +29,6 @@ def ctx():
 
 
 def test_no_table(ctx):
-    # TODO(kszucs): should raise a DataFusionError instead of plain Exeption
     with pytest.raises(Exception, match="DataFusion error"):
         ctx.sql("SELECT a FROM b").collect()
 

From f5b44cab0606b11747e3a975a33ea94dbdcd3c0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 7 Jun 2021 10:34:02 +0200
Subject: [PATCH 5/9] Format

---
 python/tests/test_udaf.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py
index 98ef95e12ace..b24c08dbc867 100644
--- a/python/tests/test_udaf.py
+++ b/python/tests/test_udaf.py
@@ -78,5 +78,4 @@ def test_group_by(df):
     batches = df.collect()
     arrays = [batch.column(1) for batch in batches]
     joined = pa.concat_arrays(arrays)
-    assert joined == pa.array([1.0 + 2.0, 3.0]))
-
+    assert joined == pa.array([1.0 + 2.0, 3.0])

From e33212fd276344f66beb6ff2012d8564675eb330 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 7 Jun 2021 10:55:44 +0200
Subject: [PATCH 6/9] Test requirements file

---
 dev/release/rat_exclude_files.txt |   1 +
 python/requirements-test.in       |  18 +++++
 python/requirements-test.txt      | 107 ++++++++++++++++++++++++++++++
 python/requirements.txt           |  16 -----
 4 files changed, 126 insertions(+), 16 deletions(-)
 create mode 100644 python/requirements-test.in
 create mode 100644 python/requirements-test.txt

diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 6126699bbc1f..96beccd0af81 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -105,3 +105,4 @@ benchmarks/queries/q*.sql
 ballista/rust/scheduler/testdata/*
 ballista/ui/scheduler/yarn.lock
 python/rust-toolchain
+python/requirements*.txt
diff --git a/python/requirements-test.in b/python/requirements-test.in
new file mode 100644
index 000000000000..95b4932fe20b
--- /dev/null
+++ b/python/requirements-test.in
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+-r requirements.in
+pytest
diff --git a/python/requirements-test.txt b/python/requirements-test.txt
new file mode 100644
index 000000000000..b5dfb8a8c3c3
--- /dev/null
+++ b/python/requirements-test.txt
@@ -0,0 +1,107 @@
+#
+# This file is autogenerated by pip-compile
+# To update, run:
+#
+#    pip-compile --generate-hashes requirements-test.in
+#
+attrs==21.2.0 \
+    --hash=sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1 \
+    --hash=sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb
+    # via pytest
+iniconfig==1.1.1 \
+    --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \
+    --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32
+    # via pytest
+maturin==0.10.6 \
+    --hash=sha256:0e81496f70a4805e6ea7dda7b0425246c111ccb119a2e22c64abeff131f4dd21 \
+    --hash=sha256:3b5d5429bc05a816824420d99973f0cab39d8e274f6c3647bfd9afd95a030304 \
+    --hash=sha256:4177a223727a0ad57bc3f69ca4c3bc04bb3cc4da787cc59a8e25808c85685c67 \
+    --hash=sha256:4eb4481b6c7d6cac043b969d2eb993c982523e91bb2709f0b09e231cf4846731 \
+    --hash=sha256:532625f312185b06ec196fdb0fc79efafc0e98768153d226fb9417c0ca85e410 \
+    --hash=sha256:53ef64a147f8a5241a3e932f2db22b5ae7dc5892dae994da319446c5db89dc94 \
+    --hash=sha256:a04589da42f62b1d515f35c81274a56fe0d29216894525e8a37fd1e3c69d87b1 \
+    --hash=sha256:b58e9e2ba5a3f651d8885c41370a00bb1d3e4d7313cbb63354077153be7650f4 \
+    --hash=sha256:bd39f7e08eb9908d4fe1cd9b3c953fad5b1fb4fec9c82d14c2973a65751e1899 \
+    --hash=sha256:d63f2a15f0b8db4e70d9a59766ca240b2c2ee2146ed5e4385a6118d941d68b25 \
+    --hash=sha256:fa7e1cea2a768257a33aeb556fdec5fc36011bfe82d96730117433c635629dd8
+    # via -r requirements.in
+numpy==1.20.3 \
+    --hash=sha256:1676b0a292dd3c99e49305a16d7a9f42a4ab60ec522eac0d3dd20cdf362ac010 \
+    --hash=sha256:16f221035e8bd19b9dc9a57159e38d2dd060b48e93e1d843c49cb370b0f415fd \
+    --hash=sha256:43909c8bb289c382170e0282158a38cf306a8ad2ff6dfadc447e90f9961bef43 \
+    --hash=sha256:4e465afc3b96dbc80cf4a5273e5e2b1e3451286361b4af70ce1adb2984d392f9 \
+    --hash=sha256:55b745fca0a5ab738647d0e4db099bd0a23279c32b31a783ad2ccea729e632df \
+    --hash=sha256:5d050e1e4bc9ddb8656d7b4f414557720ddcca23a5b88dd7cff65e847864c400 \
+    --hash=sha256:637d827248f447e63585ca3f4a7d2dfaa882e094df6cfa177cc9cf9cd6cdf6d2 \
+    --hash=sha256:6690080810f77485667bfbff4f69d717c3be25e5b11bb2073e76bb3f578d99b4 \
+    --hash=sha256:66fbc6fed94a13b9801fb70b96ff30605ab0a123e775a5e7a26938b717c5d71a \
+    --hash=sha256:67d44acb72c31a97a3d5d33d103ab06d8ac20770e1c5ad81bdb3f0c086a56cf6 \
+    --hash=sha256:6ca2b85a5997dabc38301a22ee43c82adcb53ff660b89ee88dded6b33687e1d8 \
+    --hash=sha256:6e51534e78d14b4a009a062641f465cfaba4fdcb046c3ac0b1f61dd97c861b1b \
+    --hash=sha256:70eb5808127284c4e5c9e836208e09d685a7978b6a216db85960b1a112eeace8 \
+    --hash=sha256:830b044f4e64a76ba71448fce6e604c0fc47a0e54d8f6467be23749ac2cbd2fb \
+    --hash=sha256:8b7bb4b9280da3b2856cb1fc425932f46fba609819ee1c62256f61799e6a51d2 \
+    --hash=sha256:a9c65473ebc342715cb2d7926ff1e202c26376c0dcaaee85a1fd4b8d8c1d3b2f \
+    --hash=sha256:c1c09247ccea742525bdb5f4b5ceeacb34f95731647fe55774aa36557dbb5fa4 \
+    --hash=sha256:c5bf0e132acf7557fc9bb8ded8b53bbbbea8892f3c9a1738205878ca9434206a \
+    --hash=sha256:db250fd3e90117e0312b611574cd1b3f78bec046783195075cbd7ba9c3d73f16 \
+    --hash=sha256:e515c9a93aebe27166ec9593411c58494fa98e5fcc219e47260d9ab8a1cc7f9f \
+    --hash=sha256:e55185e51b18d788e49fe8305fd73ef4470596b33fc2c1ceb304566b99c71a69 \
+    --hash=sha256:ea9cff01e75a956dbee133fa8e5b68f2f92175233de2f88de3a682dd94deda65 \
+    --hash=sha256:f1452578d0516283c87608a5a5548b0cdde15b99650efdfd85182102ef7a7c17 \
+    --hash=sha256:f39a995e47cb8649673cfa0579fbdd1cdd33ea497d1728a6cb194d6252268e48
+    # via pyarrow
+packaging==20.9 \
+    --hash=sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5 \
+    --hash=sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a
+    # via pytest
+pluggy==0.13.1 \
+    --hash=sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0 \
+    --hash=sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d
+    # via pytest
+py==1.10.0 \
+    --hash=sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3 \
+    --hash=sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a
+    # via pytest
+pyarrow==4.0.1 \
+    --hash=sha256:04be0f7cb9090bd029b5b53bed628548fef569e5d0b5c6cd7f6d0106dbbc782d \
+    --hash=sha256:0fde9c7a3d5d37f3fe5d18c4ed015e8f585b68b26d72a10d7012cad61afe43ff \
+    --hash=sha256:11517f0b4f4acbab0c37c674b4d1aad3c3dfea0f6b1bb322e921555258101ab3 \
+    --hash=sha256:150db335143edd00d3ec669c7c8167d401c4aa0a290749351c80bbf146892b2e \
+    --hash=sha256:24040a20208e9b16ba7b284624ebfe67e40f5c40b5dc8d874da322ac0053f9d3 \
+    --hash=sha256:33c457728a1ce825b80aa8c8ed573709f1efe72003d45fa6fdbb444de9cc0b74 \
+    --hash=sha256:423cd6a14810f4e40cb76e13d4240040fc1594d69fe1c4f2c70be00ad512ade5 \
+    --hash=sha256:5387db80c6a7b5598884bf4df3fc546b3373771ad614548b782e840b71704877 \
+    --hash=sha256:5a76ec44af838862b23fb5cfc48765bc7978f7b58a181c96ad92856280de548b \
+    --hash=sha256:5f2660f59dfcfd34adac7c08dc7f615920de703f191066ed6277628975f06878 \
+    --hash=sha256:6b7bd8f5aa327cc32a1b9b02a76502851575f5edb110f93c59a45c70211a5618 \
+    --hash=sha256:72cf3477538bd8504f14d6299a387cc335444f7a188f548096dfea9533551f02 \
+    --hash=sha256:76b75a9cfc572e890a1e000fd532bdd2084ec3f1ee94ee51802a477913a21072 \
+    --hash=sha256:a81adbfbe2f6528d4593b5a8962b2751838517401d14e9d4cab6787478802693 \
+    --hash=sha256:a968375c66e505f72b421f5864a37f51aad5da61b6396fa283f956e9f2b2b923 \
+    --hash=sha256:afd4f7c0a225a326d2c0039cdc8631b5e8be30f78f6b7a3e5ce741cf5dd81c72 \
+    --hash=sha256:b05bdd513f045d43228247ef4d9269c88139788e2d566f4cb3e855e282ad0330 \
+    --hash=sha256:c2733c9bcd00074ce5497dd0a7b8a10c91d3395ddce322d7021c7fdc4ea6f610 \
+    --hash=sha256:d0f080b2d9720bec42624cb0df66f60ae66b84a2ccd1fe2c291322df915ac9db \
+    --hash=sha256:dcd20ee0240a88772eeb5691102c276f5cdec79527fb3a0679af7f93f93cb4bd \
+    --hash=sha256:e1351576877764fb4d5690e4721ce902e987c85f4ab081c70a34e1d24646586e \
+    --hash=sha256:e44dfd7e61c9eb6dda59bc49ad69e77945f6d049185a517c130417e3ca0494d8 \
+    --hash=sha256:ee3d87615876550fee9a523307dd4b00f0f44cf47a94a32a07793da307df31a0 \
+    --hash=sha256:fa7b165cfa97158c1e6d15c68428317b4f4ae786d1dc2dbab43f1328c1eb43aa \
+    --hash=sha256:fe976695318560a97c6d31bba828eeca28c44c6f6401005e54ba476a28ac0a10
+    # via -r requirements.in
+pyparsing==2.4.7 \
+    --hash=sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1 \
+    --hash=sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b
+    # via packaging
+pytest==6.2.4 \
+    --hash=sha256:50bcad0a0b9c5a72c8e4e7c9855a3ad496ca6a881a3641b4260605450772c54b \
+    --hash=sha256:91ef2131a9bd6be8f76f1f08eac5c5317221d6ad1e143ae03894b862e8976890
+    # via -r requirements-test.in
+toml==0.10.2 \
+    --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \
+    --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f
+    # via
+    #   -r requirements.in
+    #   maturin
+    #   pytest
diff --git a/python/requirements.txt b/python/requirements.txt
index ff02b80cf6fc..635eb2278482 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,19 +1,3 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
 #
 # This file is autogenerated by pip-compile
 # To update, run:

From 8459db30903d4477ce348daa4e5ecb59c908a6a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 7 Jun 2021 11:05:46 +0200
Subject: [PATCH 7/9] Update workflow file

---
 .github/workflows/python_test.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python_test.yaml b/.github/workflows/python_test.yaml
index f072363d0a8f..41842bbfef93 100644
--- a/.github/workflows/python_test.yaml
+++ b/.github/workflows/python_test.yaml
@@ -50,7 +50,7 @@ jobs:
           python -m venv venv
           source venv/bin/activate
 
-          pip install -r requirements.txt pytest
+          pip install -r requirements-test.txt
           maturin develop
 
           pytest -v .

From 1bb075c745cd844d02c7c5dbd7f2cdd1a7b5c544 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 7 Jun 2021 12:37:21 +0200
Subject: [PATCH 8/9] Merge requirements file

---
 python/requirements-test.in  |  18 ------
 python/requirements-test.txt | 107 -----------------------------------
 python/requirements.in       |   1 +
 python/requirements.txt      |  31 +++++++++-
 4 files changed, 31 insertions(+), 126 deletions(-)
 delete mode 100644 python/requirements-test.in
 delete mode 100644 python/requirements-test.txt

diff --git a/python/requirements-test.in b/python/requirements-test.in
deleted file mode 100644
index 95b4932fe20b..000000000000
--- a/python/requirements-test.in
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
--r requirements.in
-pytest
diff --git a/python/requirements-test.txt b/python/requirements-test.txt
deleted file mode 100644
index b5dfb8a8c3c3..000000000000
--- a/python/requirements-test.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-#
-# This file is autogenerated by pip-compile
-# To update, run:
-#
-#    pip-compile --generate-hashes requirements-test.in
-#
-attrs==21.2.0 \
-    --hash=sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1 \
-    --hash=sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb
-    # via pytest
-iniconfig==1.1.1 \
-    --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \
-    --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32
-    # via pytest
-maturin==0.10.6 \
-    --hash=sha256:0e81496f70a4805e6ea7dda7b0425246c111ccb119a2e22c64abeff131f4dd21 \
-    --hash=sha256:3b5d5429bc05a816824420d99973f0cab39d8e274f6c3647bfd9afd95a030304 \
-    --hash=sha256:4177a223727a0ad57bc3f69ca4c3bc04bb3cc4da787cc59a8e25808c85685c67 \
-    --hash=sha256:4eb4481b6c7d6cac043b969d2eb993c982523e91bb2709f0b09e231cf4846731 \
-    --hash=sha256:532625f312185b06ec196fdb0fc79efafc0e98768153d226fb9417c0ca85e410 \
-    --hash=sha256:53ef64a147f8a5241a3e932f2db22b5ae7dc5892dae994da319446c5db89dc94 \
-    --hash=sha256:a04589da42f62b1d515f35c81274a56fe0d29216894525e8a37fd1e3c69d87b1 \
-    --hash=sha256:b58e9e2ba5a3f651d8885c41370a00bb1d3e4d7313cbb63354077153be7650f4 \
-    --hash=sha256:bd39f7e08eb9908d4fe1cd9b3c953fad5b1fb4fec9c82d14c2973a65751e1899 \
-    --hash=sha256:d63f2a15f0b8db4e70d9a59766ca240b2c2ee2146ed5e4385a6118d941d68b25 \
-    --hash=sha256:fa7e1cea2a768257a33aeb556fdec5fc36011bfe82d96730117433c635629dd8
-    # via -r requirements.in
-numpy==1.20.3 \
-    --hash=sha256:1676b0a292dd3c99e49305a16d7a9f42a4ab60ec522eac0d3dd20cdf362ac010 \
-    --hash=sha256:16f221035e8bd19b9dc9a57159e38d2dd060b48e93e1d843c49cb370b0f415fd \
-    --hash=sha256:43909c8bb289c382170e0282158a38cf306a8ad2ff6dfadc447e90f9961bef43 \
-    --hash=sha256:4e465afc3b96dbc80cf4a5273e5e2b1e3451286361b4af70ce1adb2984d392f9 \
-    --hash=sha256:55b745fca0a5ab738647d0e4db099bd0a23279c32b31a783ad2ccea729e632df \
-    --hash=sha256:5d050e1e4bc9ddb8656d7b4f414557720ddcca23a5b88dd7cff65e847864c400 \
-    --hash=sha256:637d827248f447e63585ca3f4a7d2dfaa882e094df6cfa177cc9cf9cd6cdf6d2 \
-    --hash=sha256:6690080810f77485667bfbff4f69d717c3be25e5b11bb2073e76bb3f578d99b4 \
-    --hash=sha256:66fbc6fed94a13b9801fb70b96ff30605ab0a123e775a5e7a26938b717c5d71a \
-    --hash=sha256:67d44acb72c31a97a3d5d33d103ab06d8ac20770e1c5ad81bdb3f0c086a56cf6 \
-    --hash=sha256:6ca2b85a5997dabc38301a22ee43c82adcb53ff660b89ee88dded6b33687e1d8 \
-    --hash=sha256:6e51534e78d14b4a009a062641f465cfaba4fdcb046c3ac0b1f61dd97c861b1b \
-    --hash=sha256:70eb5808127284c4e5c9e836208e09d685a7978b6a216db85960b1a112eeace8 \
-    --hash=sha256:830b044f4e64a76ba71448fce6e604c0fc47a0e54d8f6467be23749ac2cbd2fb \
-    --hash=sha256:8b7bb4b9280da3b2856cb1fc425932f46fba609819ee1c62256f61799e6a51d2 \
-    --hash=sha256:a9c65473ebc342715cb2d7926ff1e202c26376c0dcaaee85a1fd4b8d8c1d3b2f \
-    --hash=sha256:c1c09247ccea742525bdb5f4b5ceeacb34f95731647fe55774aa36557dbb5fa4 \
-    --hash=sha256:c5bf0e132acf7557fc9bb8ded8b53bbbbea8892f3c9a1738205878ca9434206a \
-    --hash=sha256:db250fd3e90117e0312b611574cd1b3f78bec046783195075cbd7ba9c3d73f16 \
-    --hash=sha256:e515c9a93aebe27166ec9593411c58494fa98e5fcc219e47260d9ab8a1cc7f9f \
-    --hash=sha256:e55185e51b18d788e49fe8305fd73ef4470596b33fc2c1ceb304566b99c71a69 \
-    --hash=sha256:ea9cff01e75a956dbee133fa8e5b68f2f92175233de2f88de3a682dd94deda65 \
-    --hash=sha256:f1452578d0516283c87608a5a5548b0cdde15b99650efdfd85182102ef7a7c17 \
-    --hash=sha256:f39a995e47cb8649673cfa0579fbdd1cdd33ea497d1728a6cb194d6252268e48
-    # via pyarrow
-packaging==20.9 \
-    --hash=sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5 \
-    --hash=sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a
-    # via pytest
-pluggy==0.13.1 \
-    --hash=sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0 \
-    --hash=sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d
-    # via pytest
-py==1.10.0 \
-    --hash=sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3 \
-    --hash=sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a
-    # via pytest
-pyarrow==4.0.1 \
-    --hash=sha256:04be0f7cb9090bd029b5b53bed628548fef569e5d0b5c6cd7f6d0106dbbc782d \
-    --hash=sha256:0fde9c7a3d5d37f3fe5d18c4ed015e8f585b68b26d72a10d7012cad61afe43ff \
-    --hash=sha256:11517f0b4f4acbab0c37c674b4d1aad3c3dfea0f6b1bb322e921555258101ab3 \
-    --hash=sha256:150db335143edd00d3ec669c7c8167d401c4aa0a290749351c80bbf146892b2e \
-    --hash=sha256:24040a20208e9b16ba7b284624ebfe67e40f5c40b5dc8d874da322ac0053f9d3 \
-    --hash=sha256:33c457728a1ce825b80aa8c8ed573709f1efe72003d45fa6fdbb444de9cc0b74 \
-    --hash=sha256:423cd6a14810f4e40cb76e13d4240040fc1594d69fe1c4f2c70be00ad512ade5 \
-    --hash=sha256:5387db80c6a7b5598884bf4df3fc546b3373771ad614548b782e840b71704877 \
-    --hash=sha256:5a76ec44af838862b23fb5cfc48765bc7978f7b58a181c96ad92856280de548b \
-    --hash=sha256:5f2660f59dfcfd34adac7c08dc7f615920de703f191066ed6277628975f06878 \
-    --hash=sha256:6b7bd8f5aa327cc32a1b9b02a76502851575f5edb110f93c59a45c70211a5618 \
-    --hash=sha256:72cf3477538bd8504f14d6299a387cc335444f7a188f548096dfea9533551f02 \
-    --hash=sha256:76b75a9cfc572e890a1e000fd532bdd2084ec3f1ee94ee51802a477913a21072 \
-    --hash=sha256:a81adbfbe2f6528d4593b5a8962b2751838517401d14e9d4cab6787478802693 \
-    --hash=sha256:a968375c66e505f72b421f5864a37f51aad5da61b6396fa283f956e9f2b2b923 \
-    --hash=sha256:afd4f7c0a225a326d2c0039cdc8631b5e8be30f78f6b7a3e5ce741cf5dd81c72 \
-    --hash=sha256:b05bdd513f045d43228247ef4d9269c88139788e2d566f4cb3e855e282ad0330 \
-    --hash=sha256:c2733c9bcd00074ce5497dd0a7b8a10c91d3395ddce322d7021c7fdc4ea6f610 \
-    --hash=sha256:d0f080b2d9720bec42624cb0df66f60ae66b84a2ccd1fe2c291322df915ac9db \
-    --hash=sha256:dcd20ee0240a88772eeb5691102c276f5cdec79527fb3a0679af7f93f93cb4bd \
-    --hash=sha256:e1351576877764fb4d5690e4721ce902e987c85f4ab081c70a34e1d24646586e \
-    --hash=sha256:e44dfd7e61c9eb6dda59bc49ad69e77945f6d049185a517c130417e3ca0494d8 \
-    --hash=sha256:ee3d87615876550fee9a523307dd4b00f0f44cf47a94a32a07793da307df31a0 \
-    --hash=sha256:fa7b165cfa97158c1e6d15c68428317b4f4ae786d1dc2dbab43f1328c1eb43aa \
-    --hash=sha256:fe976695318560a97c6d31bba828eeca28c44c6f6401005e54ba476a28ac0a10
-    # via -r requirements.in
-pyparsing==2.4.7 \
-    --hash=sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1 \
-    --hash=sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b
-    # via packaging
-pytest==6.2.4 \
-    --hash=sha256:50bcad0a0b9c5a72c8e4e7c9855a3ad496ca6a881a3641b4260605450772c54b \
-    --hash=sha256:91ef2131a9bd6be8f76f1f08eac5c5317221d6ad1e143ae03894b862e8976890
-    # via -r requirements-test.in
-toml==0.10.2 \
-    --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \
-    --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f
-    # via
-    #   -r requirements.in
-    #   maturin
-    #   pytest
diff --git a/python/requirements.in b/python/requirements.in
index 3ef9f18966d4..4ff7f4ee618b 100644
--- a/python/requirements.in
+++ b/python/requirements.in
@@ -17,3 +17,4 @@
 maturin
 toml
 pyarrow
+pytest
diff --git a/python/requirements.txt b/python/requirements.txt
index 635eb2278482..f7ede1ebd58e 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -2,8 +2,16 @@
 # This file is autogenerated by pip-compile
 # To update, run:
 #
-#    pip-compile --generate-hashes
+#    pip-compile --generate-hashes requirements.in
 #
+attrs==21.2.0 \
+    --hash=sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1 \
+    --hash=sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb
+    # via pytest
+iniconfig==1.1.1 \
+    --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \
+    --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32
+    # via pytest
 maturin==0.10.6 \
     --hash=sha256:0e81496f70a4805e6ea7dda7b0425246c111ccb119a2e22c64abeff131f4dd21 \
     --hash=sha256:3b5d5429bc05a816824420d99973f0cab39d8e274f6c3647bfd9afd95a030304 \
@@ -43,6 +51,18 @@ numpy==1.20.3 \
     --hash=sha256:f1452578d0516283c87608a5a5548b0cdde15b99650efdfd85182102ef7a7c17 \
     --hash=sha256:f39a995e47cb8649673cfa0579fbdd1cdd33ea497d1728a6cb194d6252268e48
     # via pyarrow
+packaging==20.9 \
+    --hash=sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5 \
+    --hash=sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a
+    # via pytest
+pluggy==0.13.1 \
+    --hash=sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0 \
+    --hash=sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d
+    # via pytest
+py==1.10.0 \
+    --hash=sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3 \
+    --hash=sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a
+    # via pytest
 pyarrow==4.0.1 \
     --hash=sha256:04be0f7cb9090bd029b5b53bed628548fef569e5d0b5c6cd7f6d0106dbbc782d \
     --hash=sha256:0fde9c7a3d5d37f3fe5d18c4ed015e8f585b68b26d72a10d7012cad61afe43ff \
@@ -70,9 +90,18 @@ pyarrow==4.0.1 \
     --hash=sha256:fa7b165cfa97158c1e6d15c68428317b4f4ae786d1dc2dbab43f1328c1eb43aa \
     --hash=sha256:fe976695318560a97c6d31bba828eeca28c44c6f6401005e54ba476a28ac0a10
     # via -r requirements.in
+pyparsing==2.4.7 \
+    --hash=sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1 \
+    --hash=sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b
+    # via packaging
+pytest==6.2.4 \
+    --hash=sha256:50bcad0a0b9c5a72c8e4e7c9855a3ad496ca6a881a3641b4260605450772c54b \
+    --hash=sha256:91ef2131a9bd6be8f76f1f08eac5c5317221d6ad1e143ae03894b862e8976890
+    # via -r requirements.in
 toml==0.10.2 \
     --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \
     --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f
     # via
     #   -r requirements.in
     #   maturin
+    #   pytest

From 00ce946961832f5ba1d83e066f5d5c7461d82505 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com>
Date: Mon, 7 Jun 2021 13:14:51 +0200
Subject: [PATCH 9/9] Update workflow file

---
 .github/workflows/python_test.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python_test.yaml b/.github/workflows/python_test.yaml
index 41842bbfef93..e689396b5dcd 100644
--- a/.github/workflows/python_test.yaml
+++ b/.github/workflows/python_test.yaml
@@ -50,7 +50,7 @@ jobs:
           python -m venv venv
           source venv/bin/activate
 
-          pip install -r requirements-test.txt
+          pip install -r requirements.txt
           maturin develop
 
           pytest -v .