Skip to content

Commit

Permalink
implement Time datatype
Browse files Browse the repository at this point in the history
imlpmenet BetweenTime node
closes #1098
  • Loading branch information
jreback committed Aug 10, 2017
1 parent d98b476 commit 247d31c
Show file tree
Hide file tree
Showing 9 changed files with 134 additions and 12 deletions.
35 changes: 32 additions & 3 deletions ibis/expr/api.py
Expand Up @@ -37,7 +37,7 @@
StringValue, StringScalar, StringColumn,
DecimalValue, DecimalScalar, DecimalColumn,
TimestampValue, TimestampScalar, TimestampColumn,
DateValue,
DateValue, TimeValue,
ArrayValue, ArrayScalar, ArrayColumn,
CategoryValue, unnamed, as_value_expr, literal,
null, sequence)
Expand Down Expand Up @@ -643,7 +643,11 @@ def between(arg, lower, upper):
"""
lower = _ops.as_value_expr(lower)
upper = _ops.as_value_expr(upper)
op = _ops.Between(arg, lower, upper)

if isinstance(arg.op(), _ops.Time):
op = _ops.BetweenTime(arg.op().args[0], lower, upper)
else:
op = _ops.Between(arg, lower, upper)
return op.to_expr()


Expand Down Expand Up @@ -1774,6 +1778,20 @@ def _timestamp_strftime(arg, format_str):
return _ops.Strftime(arg, format_str).to_expr()


def _timestamp_time(arg):
"""
Return a Time node for a Timestamp
We can then perform certain operations on this node
w/o actually instantiating the underlying structure
(which is inefficient in pandas/numpy)
Returns
-------
Time node
"""
return _ops.Time(arg).to_expr()


_timestamp_value_methods = dict(
strftime=_timestamp_strftime,
year=_extract_field('year', _ops.ExtractYear),
Expand All @@ -1783,7 +1801,8 @@ def _timestamp_strftime(arg, format_str):
minute=_extract_field('minute', _ops.ExtractMinute),
second=_extract_field('second', _ops.ExtractSecond),
millisecond=_extract_field('millisecond', _ops.ExtractMillisecond),
truncate=_timestamp_truncate
truncate=_timestamp_truncate,
time=_timestamp_time,
)


Expand All @@ -1799,6 +1818,16 @@ def _timestamp_strftime(arg, format_str):
_add_methods(DateValue, _date_value_methods)


# ---------------------------------------------------------------------
# Time API

_time_value_methods = dict(
between=between,
)

_add_methods(TimeValue, _time_value_methods)


# ---------------------------------------------------------------------
# Decimal API

Expand Down
21 changes: 20 additions & 1 deletion ibis/expr/datatypes.py
Expand Up @@ -308,6 +308,12 @@ def valid_literal(self, value):
return isinstance(value, six.string_types + (datetime.date,))


class Time(Primitive):

def valid_literal(self, value):
return isinstance(value, six.string_types + (datetime.time,))


def parametric(cls):
type_name = cls.__name__
array_type_name = '{0}Column'.format(type_name)
Expand Down Expand Up @@ -596,6 +602,7 @@ def _equal_part(self, other, cache=None):
double = Double()
string = String()
date = Date()
time = Time()
timestamp = Timestamp()


Expand All @@ -611,6 +618,7 @@ def _equal_part(self, other, cache=None):
'double': double,
'string': string,
'date': date,
'time': time,
'timestamp': timestamp
}

Expand Down Expand Up @@ -639,6 +647,7 @@ class Tokens(object):
RBRACKET = 16
TIMEZONE = 17
TIMESTAMP = 18
TIME = 19

@staticmethod
def name(value):
Expand Down Expand Up @@ -669,13 +678,19 @@ def name(value):
'(?P<{}>{})'.format(token.upper(), token),
lambda token, value=value: Token(Tokens.PRIMITIVE, value)
) for token, value in _primitive_types.items()
if token not in {'any', 'null', 'timestamp'}
if token not in {'any', 'null', 'timestamp', 'time'}
] + [
# timestamp
(
r'(?P<TIMESTAMP>timestamp)',
lambda token: Token(Tokens.TIMESTAMP, token),
),
] + [
# time
(
r'(?P<TIME>time)',
lambda token: Token(Tokens.TIME, token),
),
] + [
# decimal + complex types
(
Expand Down Expand Up @@ -814,6 +829,7 @@ def type(self):
| "float"
| "double"
| "string"
| "time"
| timestamp
timestamp : "timestamp"
Expand Down Expand Up @@ -843,6 +859,9 @@ def type(self):
return Timestamp(timezone=timezone)
return timestamp

elif self._accept(Tokens.TIME):
return Time()

elif self._accept(Tokens.DECIMAL):
if self._accept(Tokens.LPAREN):

Expand Down
9 changes: 9 additions & 0 deletions ibis/expr/operations.py
Expand Up @@ -2119,6 +2119,10 @@ def _assert_can_compare(self):
raise TypeError('Arguments are not comparable')


class BetweenTime(Between):
pass


class Contains(BooleanValueOp):

def __init__(self, value, options):
Expand Down Expand Up @@ -2363,6 +2367,11 @@ class ExtractMillisecond(ExtractTimestampField):
pass


class Time(UnaryOp):

output_type = rules.shape_like_arg(0, 'time')


class TimestampFromUNIX(ValueOp):

input_type = [value, rules.string_options(['s', 'ms', 'us'], name='unit')]
Expand Down
6 changes: 5 additions & 1 deletion ibis/expr/rules.py
Expand Up @@ -628,6 +628,10 @@ def date(**arg_kwds):
return ValueTyped(ir.DateValue, 'not date', **arg_kwds)


def time(**arg_kwds):
return ValueTyped(ir.TimeValue, 'not time', **arg_kwds)


def timedelta(**arg_kwds):
from ibis.expr.temporal import Timedelta
return AnyTyped(Timedelta, 'not a timedelta', **arg_kwds)
Expand All @@ -653,7 +657,7 @@ def one_of(args, **arg_kwds):
return OneOf(args, **arg_kwds)


temporal = one_of((dt.timestamp, dt.date))
temporal = one_of((dt.timestamp, dt.date, dt.time))


def instance_of(type_, **arg_kwds):
Expand Down
9 changes: 9 additions & 0 deletions ibis/expr/tests/test_datatypes.py
Expand Up @@ -326,3 +326,12 @@ def test_timestamp_with_timezone_repr():
def test_timestamp_with_timezone_str():
ts = dt.Timestamp('UTC')
assert str(ts) == "timestamp('UTC')"


def test_time():
ts = dt.time
assert str(ts) == "time"


def test_time_valid():
assert dt.validate_type('time').equals(dt.time)
40 changes: 34 additions & 6 deletions ibis/expr/types.py
Expand Up @@ -988,7 +988,8 @@ def constructor(arg, name=None):


class TemporalValue(AnyValue):
pass
def _can_compare(self, other):
return isinstance(other, (TemporalValue, StringValue))


class DateValue(TemporalValue):
Expand All @@ -1007,13 +1008,35 @@ def _can_implicit_cast(self, arg):
return False
return False

def _implicit_cast(self, arg):
# assume we've checked this is OK at this point...
op = arg.op()
return DateScalar(op)


class TimeValue(TemporalValue):

def type(self):
return dt.time

def _can_implicit_cast(self, arg):
op = arg.op()
if isinstance(op, Literal):
try:
from pandas.core.tools.datetimes import to_time
to_time(op.value)
return True
except ValueError:
return False
return False

def _can_compare(self, other):
return isinstance(other, (TemporalValue, StringValue))
return isinstance(other, (TimeValue, StringValue))

def _implicit_cast(self, arg):
# assume we've checked this is OK at this point...
op = arg.op()
return DateScalar(op)
return TimeScalar(op)


class TimestampValue(TemporalValue):
Expand Down Expand Up @@ -1049,9 +1072,6 @@ def _can_implicit_cast(self, arg):
return False
return False

def _can_compare(self, other):
return isinstance(other, (TemporalValue, StringValue))

def _implicit_cast(self, arg):
# assume we've checked this is OK at this point...
op = arg.op()
Expand Down Expand Up @@ -1158,6 +1178,14 @@ class DateColumn(ColumnExpr, DateValue):
pass


class TimeScalar(ScalarExpr, TimeValue):
pass


class TimeColumn(ColumnExpr, TimeValue):
pass


class TimestampScalar(ScalarExpr, TimestampValue):

def __init__(self, arg, meta=None, name=None):
Expand Down
2 changes: 1 addition & 1 deletion ibis/pandas/core.py
Expand Up @@ -20,7 +20,7 @@
boolean_types = bool, np.bool_
fixed_width_types = numeric_types + boolean_types
temporal_types = (
datetime.datetime, datetime.date, datetime.timedelta,
datetime.datetime, datetime.date, datetime.timedelta, datetime.time,
np.datetime64, np.timedelta64,
)
scalar_types = fixed_width_types + temporal_types
Expand Down
14 changes: 14 additions & 0 deletions ibis/pandas/execution.py
Expand Up @@ -768,6 +768,20 @@ def execute_between(op, data, lower, upper, scope=None):
return data.between(lower, upper)


@execute_node.register(
ops.BetweenTime,
pd.Series,
(pd.Series, str, datetime.time),
(pd.Series, str, datetime.time),
)
def execute_between_time(op, data, lower, upper, scope=None):
indexer = pd.DatetimeIndex(data).indexer_between_time(
lower, upper)
result = np.zeros(len(data), dtype=np.bool_)
result[indexer] = True
return result


@execute_node.register(ops.DistinctColumn, pd.Series)
def execute_series_distinct(op, data, scope=None):
return pd.Series(data.unique(), name=data.name)
Expand Down
10 changes: 10 additions & 0 deletions ibis/pandas/tests/test_operations.py
Expand Up @@ -178,6 +178,16 @@ def test_timestamp_functions(case_func, expected_func):
assert ibis.pandas.execute(result) == expected


def test_times_ops(t, df):
result = t.plain_datetimes_naive.time().between('10:00', '10:00').execute()
expected = np.zeros(len(df), dtype=bool)
tm.assert_numpy_array_equal(result, expected)

result = t.plain_datetimes_naive.time().between('01:00', '02:00').execute()
expected = np.ones(len(df), dtype=bool)
tm.assert_numpy_array_equal(result, expected)


@pytest.mark.parametrize(
'op',
[
Expand Down

0 comments on commit 247d31c

Please sign in to comment.