Skip to content

Commit

Permalink
Merge pull request #1533 from stefanseefeld/dt-namespace
Browse files Browse the repository at this point in the history
Add datetime functions to 'dt' namespace.
  • Loading branch information
kwmsmith committed Jul 12, 2016
2 parents d78137f + 25f8512 commit 8d235c3
Show file tree
Hide file tree
Showing 11 changed files with 358 additions and 26 deletions.
32 changes: 32 additions & 0 deletions blaze/compute/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
BinOp,
Broadcast,
By,
Ceil,
Coalesce,
Coerce,
Concat,
Expand All @@ -66,6 +67,7 @@
ElemWise,
Expr,
Field,
Floor,
Head,
Interp,
IsIn,
Expand All @@ -80,11 +82,14 @@
ReLabel,
Reduction,
Replace,
Round,
Sample,
seconds,
Selection,
Shift,
Slice,
Sort,
strftime,
Summary,
Tail,
UTCFromTimestamp,
Expand All @@ -104,6 +109,7 @@
StrFind,
StrSlice,
SliceReplace,
total_seconds,
)

__all__ = []
Expand Down Expand Up @@ -715,6 +721,12 @@ def compute_up(expr, s, **kwargs):
return get_date_attr(s, expr.attr, expr._name)


@dispatch(total_seconds, Series)
def compute_up(expr, s, **kwargs):
result = s.dt.total_seconds()
result.name = expr._name
return result

@dispatch(UTCFromTimestamp, Series)
def compute_up(expr, s, **kwargs):
return pd.datetools.to_datetime(s * 1e9, utc=True)
Expand All @@ -726,6 +738,26 @@ def compute_up(expr, s, **kwargs):
'%s_millisecond' % expr._child._name) // 1000


@dispatch(Round, Series)
def compute_up(expr, data, **kwargs):
return data.dt.round(expr.freq)


@dispatch(Ceil, Series)
def compute_up(expr, data, **kwargs):
return data.dt.ceil(expr.freq)


@dispatch(Floor, Series)
def compute_up(expr, data, **kwargs):
return data.dt.floor(expr.freq)


@dispatch(strftime, Series)
def compute_up(expr, data, **kwargs):
return data.dt.strftime(expr.format)


@dispatch(Slice, (DataFrame, Series))
def compute_up(expr, df, **kwargs):
index = expr.index
Expand Down
64 changes: 63 additions & 1 deletion blaze/compute/tests/test_pandas_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from datetime import datetime, timedelta
from datetime import datetime, timedelta, date, time

import numpy as np

Expand Down Expand Up @@ -850,6 +850,68 @@ def test_datetime_access():
Series([1, 1, 1], name=expr._name))


@pytest.mark.parametrize('attr, args, expected',
[('date', [], date(2016, 6, 5)),
('year', [], 2016),
('month', [], 6),
('day', [], 5),
('time', [], time(13, 32, 1)),
('hour', [], 13),
('minute', [], 32),
('second', [], 1),
('millisecond', [], 0),
('microsecond', [], 0),
('nanosecond', [], 0),
('week', [], 22),
('weekday', [], 6),
('weekday_name', [], 'Sunday'),
('daysinmonth', [], 30),
('weekofyear', [], 22),
('dayofyear', [], 157),
('dayofweek', [], 6),
('quarter', [], 2),
('is_month_start', [], False),
('is_month_end', [], False),
('is_quarter_start', [], False),
('is_quarter_end', [], False),
('is_year_start', [], False),
('is_year_end', [], False),
('days_in_month', [], 30),
('strftime', ['%Y-%m-%d'], '2016-06-05'),
])
def test_dt_namespace(attr, args, expected):
df = DataFrame({'when': [datetime(2016, 6, 5, 13, 32, 1)]})
t = symbol('t', 'var * {when: datetime}')
expr = getattr(t.when.dt, attr)(*args)
assert_series_equal(compute(expr, df), Series(expected, name=expr._name))


@pytest.mark.parametrize('attr, args, expected',
[('days', [], 7),
('nanoseconds', [], 0),
('seconds', [], 0),
('total_seconds', [], 604800.),
])
def test_td_namespace(attr, args, expected):
"""timedelta functions"""
df = DataFrame({'span': [timedelta(7)]})
t = symbol('t', 'var * {span: timedelta}')
expr = getattr(t.span.dt, attr)(*args)
assert_series_equal(compute(expr, df), Series(expected, name=expr._name))


@pytest.mark.parametrize('op, freq, expected',
[('round', 's', datetime(2016, 6, 5, 13, 32, 1)),
('round', 'h', datetime(2016, 6, 5, 14, 0, 0)),
('floor', 'h', datetime(2016, 6, 5, 13, 0, 0)),
('ceil', 'h', datetime(2016, 6, 5, 14, 0, 0))])
def test_dt_round(op, freq, expected):
df = DataFrame({'when': [datetime(2016, 6, 5, 13, 32, 1)]})
t = symbol('t', 'var * {when: datetime}')
expr = getattr(t.when.dt, op)(freq)
assert_series_equal(compute(expr, df), Series(expected, name=expr._name))


def test_frame_slice():
assert_series_equal(compute(t[0], df), df.iloc[0])
assert_series_equal(compute(t[2], df), df.iloc[2])
Expand Down
186 changes: 180 additions & 6 deletions blaze/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,22 @@

import datashape
from datashape import dshape, isdatelike, isnumeric
from datashape.coretypes import timedelta_
from ..compatibility import basestring


__all__ = ['DateTime', 'Date', 'date', 'Year', 'year', 'Month', 'month', 'Day',
'day', 'Hour', 'hour', 'Minute', 'minute', 'Second', 'second',
'Millisecond', 'millisecond', 'Microsecond', 'microsecond', 'Date',
'date', 'Time', 'time', 'UTCFromTimestamp', 'DateTimeTruncate']
'day', 'days', 'Hour', 'hour', 'Minute', 'minute', 'Second', 'second',
'Millisecond', 'millisecond', 'Microsecond', 'microsecond', 'nanosecond',
'Date',
'date', 'Time', 'time', 'week', 'nanoseconds', 'seconds', 'total_seconds',
'UTCFromTimestamp', 'DateTimeTruncate',
'Ceil', 'Floor', 'Round', 'strftime']


def _validate(var, name, type, typename):
if not isinstance(var, type):
raise TypeError('"%s" argument must be a %s'%(name, typename))


class DateTime(ElemWise):
Expand All @@ -20,7 +30,8 @@ def __str__(self):
return '%s.%s' % (str(self._child), type(self).__name__.lower())

def _schema(self):
return dshape(self._dtype)
ds = dshape(self._dtype)
return ds if not isinstance(self._child.schema.measure, datashape.Option) else datashape.Option(ds)

@property
def _name(self):
Expand Down Expand Up @@ -119,6 +130,27 @@ def utcfromtimestamp(expr):
return UTCFromTimestamp(expr)


class nanosecond(DateTime): _dtype = datashape.int64
class week(DateTime): _dtype = datashape.int64
class weekday(DateTime): _dtype = datashape.int64
class weekday_name(DateTime): _dtype = datashape.string
class daysinmonth(DateTime): _dtype = datashape.int64
class weekofyear(DateTime): _dtype = datashape.int64
class dayofyear(DateTime): _dtype = datashape.int64
class dayofweek(DateTime): _dtype = datashape.int64
class quarter(DateTime): _dtype = datashape.int64
class is_month_start(DateTime): _dtype = datashape.bool_
class is_month_end(DateTime): _dtype = datashape.bool_
class is_quarter_start(DateTime): _dtype = datashape.bool_
class is_quarter_end(DateTime): _dtype = datashape.bool_
class is_year_start(DateTime): _dtype = datashape.bool_
class is_year_end(DateTime): _dtype = datashape.bool_
class days_in_month(DateTime): _dtype = datashape.int64

class strftime(ElemWise):
_arguments = '_child', 'format'
schema = datashape.string

units = (
'year',
'month',
Expand Down Expand Up @@ -222,10 +254,152 @@ def truncate(expr, *args, **kwargs):
return DateTimeTruncate(expr, measure, normalize_time_unit(unit))


class UnaryDateTimeFunction(ElemWise):

"""DateTime function that only takes a single argument."""
_arguments = '_child'


class Round(ElemWise):
_arguments = '_child', 'freq'

@property
def schema(self):
return self._child.schema


class Floor(ElemWise):
_arguments = '_child', 'freq'

@property
def schema(self):
return self._child.schema


class Ceil(ElemWise):
_arguments = '_child', 'freq'

@property
def schema(self):
return self._child.schema


class dt_ns(object):

def __init__(self, field):
self.field = field

def year(self):
return year(self.field)
def month(self):
return month(self.field)
def day(self):
return day(self.field)
def hour(self):
return hour(self.field)
def minute(self):
return minute(self.field)
def date(self):
return date(self.field)
def time(self):
return time(self.field)
def second(self):
return second(self.field)
def millisecond(self):
return millisecond(self.field)
def microsecond(self):
return microsecond(self.field)
def nanosecond(self):
return nanosecond(self.field)
def weekday(self):
return weekday(self.field)
def weekday_name(self):
return weekday_name(self.field)
def daysinmonth(self):
return daysinmonth(self.field)
def weekofyear(self):
return weekofyear(self.field)
def dayofyear(self):
return dayofyear(self.field)
def dayofweek(self):
return dayofweek(self.field)
def quarter(self):
return quarter(self.field)
def is_month_start(self):
return is_month_start(self.field)
def is_month_end(self):
return is_month_end(self.field)
def is_quarter_start(self):
return is_quarter_start(self.field)
def is_quarter_end(self):
return is_quarter_end(self.field)
def is_year_start(self):
return is_year_start(self.field)
def is_year_end(self):
return is_year_end(self.field)
def days_in_month(self):
return days_in_month(self.field)
def strftime(self, format):
_validate(format, 'format', basestring, 'string')
return strftime(self.field, format)
def truncate(self, *args, **kwargs):
return truncate(self.field, *args, **kwargs)
def round(self, freq):
_validate(freq, 'freq', basestring, 'string')
return Round(self.field, freq)
def floor(self, freq):
_validate(freq, 'freq', basestring, 'string')
return Floor(self.field, freq)
def ceil(self, freq):
_validate(freq, 'freq', basestring, 'string')
return Ceil(self.field, freq)
def week(self):
return week(self.field)

class dt(object):

__name__ = 'dt'

def __get__(self, obj, type=None):
return dt_ns(obj) if obj is not None else self


class days(DateTime): _dtype = datashape.int64
class nanoseconds(DateTime): _dtype = datashape.int64
class seconds(DateTime): _dtype = datashape.int64
class total_seconds(DateTime): _dtype = datashape.int64


class timedelta_ns(object):

def __init__(self, field):
self.field = field

def days(self): return days(self.field)
def nanoseconds(self): return nanoseconds(self.field)
def seconds(self): return seconds(self.field)
def total_seconds(self): return total_seconds(self.field)


class timedelta(object):

# pandas uses the same 'dt' name for
# DateTimeProperties and TimedeltaProperties.
__name__ = 'dt'

def __get__(self, obj, type=None):
return timedelta_ns(obj) if obj is not None else self


def isdeltalike(ds):
return ds == timedelta_

schema_method_list.extend([
(isdatelike, set([year, month, day, hour, minute, date, time, second,
millisecond, microsecond, truncate])),
(isnumeric, set([utcfromtimestamp]))
millisecond, microsecond, truncate,
dt()])),
(isnumeric, set([utcfromtimestamp])),
(isdeltalike, set([timedelta()]))
])

method_properties |= set([year, month, day, hour, minute, second, millisecond,
Expand Down

0 comments on commit 8d235c3

Please sign in to comment.