This repository has been archived by the owner on Jan 15, 2020. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Ivan Sagalaev
committed
Aug 26, 2010
0 parents
commit 89196ca
Showing
7 changed files
with
169 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,24 @@ | |||
Copyright (c) 2010, Ivan Sagalaev | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are met: | |||
|
|||
* Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
* Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in the | |||
documentation and/or other materials provided with the distribution. | |||
* Neither the name "ijson" nor the names of its contributors | |||
may be used to endorse or promote products derived from this software | |||
without specific prior written permission. | |||
|
|||
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY | |||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |||
DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY | |||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,3 @@ | |||
include *.txt | |||
|
|||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,32 @@ | |||
===== | |||
ijson | |||
===== | |||
|
|||
Ijson is a Python wrapper to `YAJL <http://lloyd.github.com/yajl/>`_ which is a | |||
streaming SAX-like JSON parser. Ijson provides a standard Python iterator | |||
interface for it. | |||
|
|||
Usage | |||
===== | |||
|
|||
Basic usage:: | |||
|
|||
from ijson import parse | |||
|
|||
f = urlopen('http://.../') # some huge JSON | |||
parser = parse(f) | |||
while True: | |||
event, value = parser.next() | |||
if event == 'start_map': | |||
while event != 'end_map': | |||
event, value = parser.next() | |||
if event == 'map_key' and value == 'title': | |||
event, value = parser.next() | |||
do_something_with(value) | |||
|
|||
Acknowledgements | |||
================ | |||
|
|||
Ijson was inspired by `yajl-py <http://pykler.github.com/yajl-py/>`_ wrapper by | |||
Hatem Nassrat. Though ijson borrows almost nothing from the actual yajl-py code | |||
it was used as an example of integration with yajl using ctypes. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1 @@ | |||
from ijson.parse import parse |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,6 @@ | |||
from ctypes import cdll, util | |||
|
|||
name = util.find_library('yajl') | |||
if name is None: | |||
raise Exception('YAJL shared object not found.') | |||
yajl = cdll.LoadLibrary(name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,89 @@ | |||
from ctypes import Structure, c_uint, c_ubyte, c_int, c_long, c_double, \ | |||
c_void_p, c_char_p, CFUNCTYPE, POINTER, byref, string_at | |||
from decimal import Decimal | |||
|
|||
from ijson.lib import yajl | |||
|
|||
C_EMPTY = CFUNCTYPE(c_int, c_void_p) | |||
C_INT = CFUNCTYPE(c_int, c_void_p, c_int) | |||
C_LONG = CFUNCTYPE(c_int, c_void_p, c_long) | |||
C_DOUBLE = CFUNCTYPE(c_int, c_void_p, c_double) | |||
C_STR = CFUNCTYPE(c_int, c_void_p, POINTER(c_ubyte), c_uint) | |||
|
|||
def number(value): | |||
try: | |||
return int(value) | |||
except ValueError: | |||
return Decimal(value) | |||
|
|||
_callback_data = [ | |||
# Mapping of JSON parser events to callback C types and value converters. | |||
# Used to define the Callbacks structure and actual callback functions | |||
# inside the parse function. | |||
('null', C_EMPTY, lambda: None), | |||
('boolean', C_INT, lambda v: bool(v)), | |||
('integer', C_LONG, lambda v, l: int(string_at(v, l))), | |||
('double', C_DOUBLE, lambda v, l: float(string_at(v, l))), | |||
('number', C_STR, lambda v, l: number(string_at(v, l))), | |||
('string', C_STR, lambda v, l: string_at(v, l).decode('utf-8')), | |||
('start_map', C_EMPTY, lambda: None), | |||
('map_key', C_STR, lambda v, l: string_at(v, l)), | |||
('end_map', C_EMPTY, lambda: None), | |||
('start_array', C_EMPTY, lambda: None), | |||
('end_array', C_EMPTY, lambda: None), | |||
] | |||
|
|||
class Callbacks(Structure): | |||
_fields_ = [(name, type) for name, type, func in _callback_data] | |||
|
|||
class Config(Structure): | |||
_fields_ = [ | |||
("allowComments", c_uint), | |||
("checkUTF8", c_uint) | |||
] | |||
|
|||
YAJL_OK = 0 | |||
YAJL_CANCELLED = 1 | |||
YAJL_INSUFFICIENT_DATA = 2 | |||
YAJL_ERROR = 3 | |||
|
|||
class JSONError(Exception): | |||
pass | |||
|
|||
class ParseCancelledError(JSONError): | |||
def __init__(self): | |||
super(ParseCancelledError, self).__init__('Parsing cancelled by a callback') | |||
|
|||
def parse(f, allow_comments=False, check_utf8=False, buf_size=64 * 1024): | |||
events = [] | |||
|
|||
def callback(event, func_type, func): | |||
def c_callback(context, *args): | |||
events.append((event, func(*args))) | |||
return 1 | |||
return func_type(c_callback) | |||
|
|||
yajl.yajl_get_error.restype = c_char_p | |||
callbacks = Callbacks(*[callback(*data) for data in _callback_data]) | |||
config = Config(allow_comments, check_utf8) | |||
handle = yajl.yajl_alloc(byref(callbacks), byref(config), None, None) | |||
try: | |||
while True: | |||
buffer = f.read(buf_size) | |||
if buffer: | |||
result = yajl.yajl_parse(handle, buffer, len(buffer)) | |||
else: | |||
result = yajl.yajl_parse_complete(handle) | |||
if not buffer or (result in (YAJL_CANCELLED, YAJL_ERROR)): | |||
break | |||
for event, value in events: | |||
yield event, value | |||
events = [] | |||
|
|||
if result == YAJL_CANCELLED: | |||
raise ParseCancelledError() | |||
elif result == YAJL_ERROR: | |||
error = yajl.yajl_get_error(handle, 1, buffer, len(buffer)) | |||
raise JSONError(error) | |||
finally: | |||
yajl.yajl_free(handle) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,14 @@ | |||
# -*- coding:utf-8 -*- | |||
from distutils.core import setup | |||
|
|||
setup( | |||
name = 'ijson', | |||
version = '0.1.0', | |||
author = 'Ivan Sagalaev', | |||
author_email = 'Maniac@SoftwareManiacs.Org', | |||
packages = ['ijson'], | |||
url = 'https://launchpad.net/ijson', | |||
license = 'LICENSE.txt', | |||
description = 'A Python wrapper to YAJL providing standard iterator interface to streaming JSON parsing', | |||
long_description = open('README.txt').read(), | |||
) |