Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: fix isin for server #1255

Merged
merged 3 commits into from Oct 13, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions blaze/server/serialization.py
Expand Up @@ -5,7 +5,7 @@
import pandas.msgpack as msgpack_module

from ..compatibility import pickle as pickle_module, unicode
from ..utils import json_dumps
from ..utils import json_dumps, object_hook


SerializationFormat = namedtuple('SerializationFormat', 'name loads dumps')
Expand All @@ -19,7 +19,7 @@ def _coerce_str(bytes_or_str):

json = SerializationFormat(
'json',
lambda data: json_module.loads(_coerce_str(data)),
lambda data: json_module.loads(_coerce_str(data), object_hook=object_hook),
partial(json_module.dumps, default=json_dumps),
)
pickle = SerializationFormat(
Expand All @@ -29,7 +29,7 @@ def _coerce_str(bytes_or_str):
)
msgpack = SerializationFormat(
'msgpack',
partial(msgpack_module.unpackb, encoding='utf-8'),
partial(msgpack_module.unpackb, encoding='utf-8', object_hook=object_hook),
partial(msgpack_module.packb, default=json_dumps),
)

Expand Down
18 changes: 18 additions & 0 deletions blaze/server/tests/test_server.py
Expand Up @@ -486,3 +486,21 @@ def test_minute_query(test, serial):
}
assert result.status_code == 200
assert expected == serial.loads(result.data)


@pytest.mark.parametrize('serial', all_formats)
def test_isin(test, serial):
expr = t.events.value.isin(frozenset([1]))
query = {'expr': to_tree(expr)}
result = test.post(
'/compute',
headers=mimetype(serial),
data=serial.dumps(query)
)
expected = {
'data': [True, False],
'names': ['value'],
'datashape': '2 * bool',
}
assert result.status_code == 200
assert expected == serial.loads(result.data)
31 changes: 15 additions & 16 deletions blaze/tests/test_utils.py
@@ -1,19 +1,12 @@
import sys
from datetime import datetime, timedelta
import os
import json

import pytest

from datetime import datetime

import numpy as np
import pandas as pd
import h5py

from datashape import dshape
import pytest
from pytz import utc

from blaze import discover
from blaze.utils import tmpfile, json_dumps
from blaze.utils import tmpfile, json_dumps, object_hook


def test_tmpfile():
Expand All @@ -26,8 +19,14 @@ def test_tmpfile():
assert not os.path.exists(f)


def test_json_encoder():
result = json.dumps([1, datetime(2000, 1, 1, 12, 30, 0)],
default=json_dumps)
assert result == '[1, "2000-01-01T12:30:00Z"]'
assert json.loads(result) == [1, "2000-01-01T12:30:00Z"]
@pytest.mark.parametrize('input_,serialized', (
([1, datetime(2000, 1, 1, 12, 30, 0, 0, utc)],
'[1, {"__!datetime": "2000-01-01T12:30:00+00:00"}]'),
([1, pd.NaT], '[1, {"__!datetime": "NaT"}]'),
([1, frozenset([1, 2, 3])], '[1, {"__!frozenset": [1, 2, 3]}]'),
([1, timedelta(seconds=5)], '[1, {"__!timedelta": 5.0}]'),
))
def test_json_encoder(input_, serialized):
result = json.dumps(input_, default=json_dumps)
assert result == serialized
assert json.loads(result, object_hook=object_hook) == input_
78 changes: 74 additions & 4 deletions blaze/utils.py
Expand Up @@ -8,13 +8,16 @@
except ImportError:
from toolz import nth

from toolz.curried.operator import setitem

from itertools import islice
from collections import Iterator
from multiprocessing.pool import ThreadPool

# these are used throughout blaze, don't remove them
from odo.utils import tmpfile, filetext, filetexts, raises, keywords, ignoring

import pandas as pd
import psutil
import numpy as np

Expand Down Expand Up @@ -157,7 +160,74 @@ def listpack(x):

@dispatch(datetime.datetime)
def json_dumps(dt):
s = dt.isoformat()
if not dt.tzname():
s += 'Z'
return s
if dt is pd.NaT:
# NaT has an isoformat but it is totally invalid.
# This keeps the parsing on the client side simple.
s = 'NaT'
else:
s = dt.isoformat()
if not dt.tzname():
s += 'Z'

return {'__!datetime': s}


@dispatch(frozenset)
def json_dumps(ds):
return {'__!frozenset': list(ds)}


@dispatch(datetime.timedelta)
def json_dumps(ds):
return {'__!timedelta': ds.total_seconds()}


def object_hook(obj):
"""Convert a json object dict back into a python object.

This looks for our objects that have encoded richer representations with
a ``__!{type}`` key.

Parameters
----------
obj : dict
The raw json parsed dictionary.

Returns
-------
parsed : any
The richer form of the object.

Notes
-----
The types that this reads can be extended with the ``register`` method.
For example:

>>> class MyList(list):
... pass
>>> @object_hook.register('MyList')
... def _parse_my_list(obj):
... return MyList(obj)

Register can also be called as a function like:
>>> object_hook.register('frozenset', frozenset)
"""
if len(obj) != 1:
return obj

key, = obj.keys()
if not key.startswith('__!'):
return obj

return object_hook._converters[key[3:]](obj[key])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you write this as key[len('__!'):]? IMO this clarifies what you're doing here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

object_hook._converters = {}
object_hook.register = setitem(object_hook._converters)


object_hook.register('datetime', pd.Timestamp)
object_hook.register('frozenset', frozenset)


@object_hook.register('timedelta')
def _read_timedelta(ds):
return datetime.timedelta(seconds=ds)
3 changes: 3 additions & 0 deletions docs/source/whatsnew/0.9.0.txt
Expand Up @@ -40,3 +40,6 @@ Bug Fixes
* Fixed a bug that prevented creating a
:class:`~blaze.interactive.InteractiveSymbol` that wrapped ``nan`` if the
dshape was ``datetime``. This now correctly coerces to `NaT` (:issue:`1272`).
* Fixed an issue where blaze client/server could not use `isin` expressions
because the ``frozenset`` failed to serialize. This also added support for
rich serialization over json for things like datetimes (:issue:`1255`).