Skip to content

Commit

Permalink
add binary type support
Browse files Browse the repository at this point in the history
  • Loading branch information
Han Wang committed Jun 16, 2020
1 parent bf00c31 commit 12ad4ed
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 3 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ pip install triad

## Release History

### 0.3.7
* Add pyarrow binary type support

### 0.3.6
* Add `transform` to Schema class

Expand Down
4 changes: 3 additions & 1 deletion tests/collections/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_schema_setter():
with raises(SchemaError):
a["x"] = pa.field("y", pa.int32()) # key!=field.name
with raises(SchemaError):
a["y"] = pa.binary() # unsupported types
a["y"] = pa.large_binary() # unsupported types
a["c"] = str
a["d"] = pa.field("d", pa.int32())
assert a == "a:int,b:str,c:str,d:int"
Expand Down Expand Up @@ -280,6 +280,8 @@ def test_schema_rename():

def test_schema_transform():
s = Schema("a:int,b:str,c:bool")
assert s.transform() == Schema()
assert s.transform(None) == Schema()
assert s.transform("x:str") == "x:str"
assert s.transform("*") == s
assert s.transform("*~x,y") == s
Expand Down
10 changes: 10 additions & 0 deletions tests/utils/test_pandas_like.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pytest import raises
from triad.utils.pandas_like import PD_UTILS
from triad.utils.pyarrow import expression_to_schema
import pickle


def test_to_schema():
Expand Down Expand Up @@ -105,6 +106,15 @@ def test_nested():
assert [[[1, 2]]] == a


def test_binary():
b = pickle.dumps("xyz")
data = [[b, b"xy"]]
s = expression_to_schema("a:bytes,b:bytes")
df = DF(data, "a:bytes,b:bytes")
a = df.as_array(type_safe=True)
assert [[b, b"xy"]] == a


def test_nan_none():
df = DF([[None, None]], "b:str,c:double", True)
assert df.native.iloc[0, 0] is None
Expand Down
13 changes: 12 additions & 1 deletion tests/utils/test_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ def test_expression_conversion():
"a:[{x:int,y:[str]}],b:[ubyte]",
)
_assert_from_expr("a:decimal(5,2)")
_assert_from_expr("a:bytes,b:bytes")
_assert_from_expr("a:bytes,b: binary", "a:bytes,b:bytes")

raises(SyntaxError, lambda: expression_to_schema("123:int"))
raises(SyntaxError, lambda: expression_to_schema("int"))
Expand Down Expand Up @@ -75,6 +77,11 @@ def test__type_to_expression():
assert "timestamp(s)" == _type_to_expression(pa.timestamp("s"))
assert "decimal(5)" == _type_to_expression(pa.decimal128(5))
assert "decimal(5,2)" == _type_to_expression(pa.decimal128(5, 2))
assert "bytes" == _type_to_expression(pa.binary())
assert "bytes" == _type_to_expression(pa.binary(-1))
raises(NotImplementedError, lambda: _type_to_expression(pa.binary(0)))
raises(NotImplementedError, lambda: _type_to_expression(pa.binary(-2)))
raises(NotImplementedError, lambda: _type_to_expression(pa.binary(1)))
raises(NotImplementedError, lambda: _type_to_expression(pa.large_binary()))


Expand All @@ -87,6 +94,8 @@ def test_to_pa_datatype():
assert TRIAD_DEFAULT_TIMESTAMP == to_pa_datatype(datetime)
assert pa.date32() == to_pa_datatype(date)
assert pa.date32() == to_pa_datatype("date")
assert pa.binary() == to_pa_datatype("bytes")
assert pa.binary() == to_pa_datatype("binary")
raises(TypeError, lambda: to_pa_datatype(123))
raises(TypeError, lambda: to_pa_datatype(None))

Expand All @@ -97,7 +106,9 @@ def test_is_supported():
assert is_supported(pa.timestamp("s"))
assert is_supported(pa.date32())
assert not is_supported(pa.date64())
assert not is_supported(pa.binary())
assert is_supported(pa.binary())
assert not is_supported(pa.binary(0))
assert not is_supported(pa.binary(1))
assert is_supported(pa.struct([pa.field("a", pa.int32())]))
assert is_supported(pa.list_(pa.int32()))
raises(NotImplementedError, lambda: is_supported(pa.date64(), throw=True))
Expand Down
15 changes: 15 additions & 0 deletions tests/utils/test_pyarrow_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from triad.constants import FLOAT_INF, FLOAT_NAN, FLOAT_NINF
from triad.exceptions import InvalidOperationError, NoneArgumentError
from triad.utils.pyarrow import _to_pydate, _to_pydatetime, apply_schema
import pickle

"""
None,"1",1,1.1,"2020-01-01","2020-01-01 01:02:03",
Expand Down Expand Up @@ -107,6 +108,20 @@ def test_convert_to_bool():
_test_convert(np.nan, "bool", None)


def test_convert_to_binary():
pdt = pd.Timestamp("2020-01-01T02:03:04")

_test_convert(None, "bytes", None)
_test_convert(b'\x0e\x15', "bytes", b'\x0e\x15')
_test_convert(bytearray(b'\x0e\x15'), "bytes", b'\x0e\x15')
_test_convert(False, "bytes", pickle.dumps(False))
_test_convert("true", "bytes", pickle.dumps("true"))
_test_convert(pd.NaT, "bytes", pickle.dumps(pd.NaT))
_test_convert(pdt, "bytes", pickle.dumps(pdt))
_test_convert(FLOAT_NAN, "bytes", pickle.dumps(FLOAT_NAN))
_test_convert(np.nan, "bytes", pickle.dumps(np.nan))


def test_convert_to_datetime():
pdt = pd.Timestamp("2020-01-01T02:03:04")
dt = datetime(2020, 1, 1, 2, 3, 4)
Expand Down
2 changes: 1 addition & 1 deletion triad/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.3.6"
__version__ = "0.3.7"
13 changes: 13 additions & 0 deletions triad/utils/pyarrow.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import pickle
from datetime import date, datetime
from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple

Expand Down Expand Up @@ -42,6 +43,8 @@
"float64": pa.float64(),
"date": pa.date32(),
"datetime": TRIAD_DEFAULT_TIMESTAMP,
"binary": pa.binary(),
"bytes": pa.binary(),
}

_TYPE_EXPRESSION_R_MAPPING: Dict[pa.DataType, str] = {
Expand All @@ -61,6 +64,7 @@
pa.float64(): "double",
pa.date32(): "date",
TRIAD_DEFAULT_TIMESTAMP: "datetime",
pa.binary(): "bytes",
}


Expand Down Expand Up @@ -442,6 +446,14 @@ def _to_pydate(obj: Any) -> Any:
return None if obj != obj else obj


def _to_pybytes(obj: Any) -> Any:
if obj is None or isinstance(obj, bytes):
return obj
if isinstance(obj, bytearray):
return bytes(obj)
return pickle.dumps(obj)


def _assert_pytype(pytype: type, obj: Any) -> Any:
if obj is None or isinstance(obj, pytype):
return obj
Expand Down Expand Up @@ -504,6 +516,7 @@ class _TypeConverter(object):
pa.float32(): _to_pyfloat,
pa.float64(): _to_pyfloat,
pa.date32(): _to_pydate,
pa.binary(): _to_pybytes,
}

def __init__(
Expand Down

0 comments on commit 12ad4ed

Please sign in to comment.