Skip to content

Commit

Permalink
Use ByteArray as input for json.Parser
Browse files Browse the repository at this point in the history
The JSON parser now takes a `ref ByteArray` as its input, instead of a
`String`. This makes parsing files and the likes a bit more efficient,
as you don't need to first convert the bytes into a `String` just so you
can parse it.

As part of this, json.parse is moved to Json.parse_string, and
Json.parse_bytes is added to parse a `ByteArray`. Finally, the runtime
functions for parsing strings are changed to simply take a pointer and a
size, instead of a string and a range. This removes the need for an
intermediate `String`.

This fixes #357.

Changelog: changed
  • Loading branch information
yorickpeterse committed Jul 21, 2023
1 parent 706e3b9 commit c78555e
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 73 deletions.
30 changes: 10 additions & 20 deletions rt/src/runtime/string.rs
Expand Up @@ -7,6 +7,7 @@ use std::cmp::min;
use std::ffi::CStr;
use std::os::raw::c_char;
use std::slice;
use std::str;
use unicode_segmentation::{Graphemes, UnicodeSegmentation};

#[no_mangle]
Expand Down Expand Up @@ -77,16 +78,11 @@ pub unsafe extern "system" fn inko_string_to_byte_array(
#[no_mangle]
pub unsafe extern "system" fn inko_string_to_float(
state: *const State,
string: *const InkoString,
start: i64,
end: i64,
bytes: *mut u8,
size: i64,
) -> InkoResult {
let string = InkoString::read(string);
let slice = if start >= 0 && end >= 0 {
&string[start as usize..end as usize]
} else {
string
};
let slice =
str::from_utf8_unchecked(slice::from_raw_parts(bytes, size as _));

let parsed = match slice {
"Infinity" => Ok(f64::INFINITY),
Expand All @@ -103,27 +99,21 @@ pub unsafe extern "system" fn inko_string_to_float(
pub unsafe extern "system" fn inko_string_to_int(
state: *const State,
process: ProcessPointer,
string: *const InkoString,
bytes: *mut u8,
size: i64,
radix: i64,
start: i64,
end: i64,
) -> InkoResult {
let string = InkoString::read(string);

if !(2..=36).contains(&radix) {
panic(process, &format!("The radix '{}' is invalid", radix));
}

let slice = if start >= 0 && end >= 0 {
&string[start as usize..end as usize]
} else {
string
};
let slice =
str::from_utf8_unchecked(slice::from_raw_parts(bytes, size as _));

// Rust doesn't handle parsing strings like "-0x4a3f043013b2c4d1" out of the
// box.
let parsed = if radix == 16 {
if let Some(tail) = string.strip_prefix("-0x") {
if let Some(tail) = slice.strip_prefix("-0x") {
i64::from_str_radix(tail, 16).map(|v| 0_i64.wrapping_sub(v))
} else {
i64::from_str_radix(slice, 16)
Expand Down
7 changes: 3 additions & 4 deletions std/src/std/float.inko
Expand Up @@ -15,9 +15,8 @@ class extern AnyResult {
fn extern inko_float_to_string(state: Pointer[Int8], float: Float64) -> String
fn extern inko_string_to_float(
state: Pointer[Int8],
string: String,
start: Int,
end: Int,
bytes: Pointer[Int8],
size: Int,
) -> AnyResult

# A type that can be converted to a Float.
Expand Down Expand Up @@ -74,7 +73,7 @@ class builtin Float {
#
# Float.parse('1.2e1') # => Option.Some(12.0)
fn pub static parse(string: String) -> Option[Float] {
match inko_string_to_float(_INKO.state, string, -1, -1) {
match inko_string_to_float(_INKO.state, string.to_pointer, string.size) {
case { @tag = 0, @value = v } -> Option.Some(v as Float)
case _ -> Option.None
}
Expand Down
17 changes: 11 additions & 6 deletions std/src/std/int.inko
Expand Up @@ -31,10 +31,9 @@ class extern IntResult {
fn extern inko_string_to_int(
state: Pointer[Int8],
process: Pointer[Int8],
string: String,
bytes: Pointer[Int8],
size: Int,
radix: Int,
start: Int,
end: Int,
) -> IntResult

fn extern inko_int_pow(process: Pointer[Int8], left: Int, right: Int) -> Int64
Expand All @@ -59,7 +58,9 @@ class builtin Int {
# Int.from_base2('11') # => Option.Some(3)
# Int.from_base2('ff') # => Option.None
fn pub static from_base2(string: String) -> Option[Int] {
match inko_string_to_int(_INKO.state, _INKO.process, string, 2, -1, -1) {
match inko_string_to_int(
_INKO.state, _INKO.process, string.to_pointer, string.size, 2
) {
case { @tag = 0, @value = v } -> Option.Some(v)
case _ -> Option.None
}
Expand All @@ -75,7 +76,9 @@ class builtin Int {
# Int.from_base10('12') # => Option.Some(12)
# Int.from_base10('ff') # => Option.None
fn pub static from_base10(string: String) -> Option[Int] {
match inko_string_to_int(_INKO.state, _INKO.process, string, 10, -1, -1) {
match inko_string_to_int(
_INKO.state, _INKO.process, string.to_pointer, string.size, 10
) {
case { @tag = 0, @value = v } -> Option.Some(v)
case _ -> Option.None
}
Expand All @@ -95,7 +98,9 @@ class builtin Int {
# Int.from_base16('ef') # => Option.Some(239)
# Int.from_base16('zz') # => Option.None
fn pub static from_base16(string: String) -> Option[Int] {
match inko_string_to_int(_INKO.state, _INKO.process, string, 16, -1, -1) {
match inko_string_to_int(
_INKO.state, _INKO.process, string.to_pointer, string.size, 16
) {
case { @tag = 0, @value = v } -> Option.Some(v)
case _ -> Option.None
}
Expand Down
85 changes: 50 additions & 35 deletions std/src/std/json.inko
Expand Up @@ -76,17 +76,15 @@ class extern AnyResult {
fn extern inko_string_to_int(
state: Pointer[Int8],
process: Pointer[Int8],
string: String,
bytes: Pointer[Int8],
size: Int,
radix: Int,
start: Int,
end: Int,
) -> IntResult

fn extern inko_string_to_float(
state: Pointer[Int8],
string: String,
start: Int,
end: Int,
bytes: Pointer[Int8],
size: Int,
) -> AnyResult

let EOF = -1
Expand Down Expand Up @@ -215,6 +213,30 @@ class pub enum Json {
case Bool(Bool)
case Null

# Parses a JSON `String` into a `Json` value.
#
# # Examples
#
# import std.json.Json
#
# Json.parse_string('[10]').unwrap # => Result.Ok(Json.Array([Json.Int(10)]))
fn pub static parse_string(string: String) -> Result[Json, Error] {
let bytes = string.to_byte_array

Parser.new(bytes).parse
}

# Parses a `ByteArra` into a `Json` value.
#
# # Examples
#
# import std.json.Json
#
# Json.parse_bytes('[10]'.to_byte_array) # => Result.Ok(Json.Array([Json.Int(10)]))
fn pub static parse_bytes(bytes: ref ByteArray) -> Result[Json, Error] {
Parser.new(bytes).parse
}

# Formats `self` as a JSON string using indentation for nested objects.
#
# This method uses two spaces per indentation. To customise the amount of
Expand Down Expand Up @@ -319,7 +341,7 @@ impl Equal[Json] for Json {

# A type for parsing a stream of bytes into a JSON object.
#
# This parser only supports parsing `String` values as input. If you need to
# This parser only supports parsing `ByteArray` values as input. If you need to
# parse very large documents, it's best to separate the objects on a per line
# basis, then parse the document one line at a time.
#
Expand All @@ -336,7 +358,7 @@ impl Equal[Json] for Json {
# 10 MiB _per string_. You can change this limit by adjusting the value of the
# `max_string_size` field.
class pub Parser {
let @string: String
let @input: ref ByteArray
let @index: Int
let @size: Int
let @line: Int
Expand All @@ -352,12 +374,12 @@ class pub Parser {
# When parsing a string that exceeds this limit, an error is thrown.
let pub @max_string_size: Int

# Returns a new parser that will parse the given `String`.
fn pub static new(string: String) -> Parser {
# Returns a new parser that will parse the given `ByteArray`.
fn pub static new(input: ref ByteArray) -> Parser {
Parser {
@string = string,
@input = input,
@index = 0,
@size = string.size,
@size = input.size,
@line = 1,
@depth = 0,
@max_depth = 100,
Expand Down Expand Up @@ -451,8 +473,7 @@ class pub Parser {
}

fn mut string_with_escape_sequence(started_at: Int) -> Result[String, Error] {
let buffer =
@string.slice_bytes(started_at, @index - started_at).to_byte_array
let buffer = @input.slice(started_at, size: @index - started_at)

loop {
match current {
Expand Down Expand Up @@ -532,9 +553,10 @@ class pub Parser {
throw error("Expected four hexadecimal digits, but we ran out of input")
}

match inko_string_to_int(
_INKO.state, _INKO.process, @string, 16, start, @index
) {
let ptr = @input.to_pointer as Int + start as Pointer[Int8]
let size = @index - start

match inko_string_to_int(_INKO.state, _INKO.process, ptr, size, 16) {
case { @tag = 0, @value = v } -> Result.Ok(v)
case _ -> Result.Error(
error("'{slice_string(start)}' is an invalid Unicode codepoint")
Expand Down Expand Up @@ -635,9 +657,10 @@ class pub Parser {
# number parser. As part of parsing the JSON number we already validate
# it. This means we can bypass `Int.from_base10` (and `Float.parse`
# below), and instead use the underlying runtime functions.
match inko_string_to_int(
_INKO.state, _INKO.process, @string, 10, start, @index
) {
let ptr = @input.to_pointer as Int + start as Pointer[Int8]
let size = @index - start

match inko_string_to_int(_INKO.state, _INKO.process, ptr, size, 10) {
# If the number is too big to fit in an integer, we'll promote the
# number to a float.
case { @tag = 0, @value = v } -> return Result.Ok(Json.Int(v))
Expand All @@ -649,8 +672,11 @@ class pub Parser {
# At this point we've already validated the input format, and it's
# compatible with the underlying float parser, so no extra checks are
# needed.
let ptr = @input.to_pointer as Int + start as Pointer[Int8]
let size = @index - start

Result.Ok(Json.Float(
inko_string_to_float(_INKO.state, @string, start, @index).value as Float
inko_string_to_float(_INKO.state, ptr, size).value as Float
))
}

Expand All @@ -675,7 +701,7 @@ class pub Parser {

fn current -> Int {
if @index < @size {
@string.byte(@index)
@input.get(@index)
} else {
EOF
}
Expand All @@ -684,7 +710,7 @@ class pub Parser {
fn peek -> Int {
let index = @index + 1

if index < @size { @string.byte(index) } else { EOF }
if index < @size { @input.get(index) } else { EOF }
}

fn mut identifier(name: String) -> Result[Nil, Error] {
Expand Down Expand Up @@ -741,7 +767,7 @@ class pub Parser {
}

fn slice_string(start: Int) -> String {
@string.slice_bytes(start, size: @index - start)
@input.slice(start, size: @index - start).into_string
}

fn error(message: String) -> Error {
Expand Down Expand Up @@ -857,14 +883,3 @@ class pub Generator {
@buffer.push(if @pretty { ",\n" } else { ', ' })
}
}

# Parses a JSON string into a `Json` value.
#
# # Examples
#
# import std.json
#
# json.parse('[10]').unwrap # => Json.Array([Json.Int(10)])
fn pub parse(string: String) -> Result[Json, Error] {
Parser.new(string).parse
}
29 changes: 21 additions & 8 deletions std/test/std/test_json.inko
@@ -1,13 +1,17 @@
import helpers.(fmt)
import std.json.(self, Error, Json, Parser)
import std.json.(Error, Json, Parser)
import std.test.Tests

fn parse(input: String) -> Result[Json, Error] {
Parser.new(input).parse
let bytes = input.to_byte_array

Parser.new(bytes).parse
}

fn parse_invalid(input: String) -> Option[String] {
Parser.new(input).parse.error.map fn (v) { v.to_string }
let bytes = input.to_byte_array

Parser.new(bytes).parse.error.map fn (v) { v.to_string }
}

fn pub tests(t: mut Tests) {
Expand Down Expand Up @@ -261,7 +265,8 @@ fn pub tests(t: mut Tests) {
t.true(parse('[],').error?)

{
let parser = Parser.new('[[[[10]]]]')
let bytes = '[[[[10]]]]'.to_byte_array
let parser = Parser.new(bytes)

parser.max_depth = 2
t.true(parser.parse.error?)
Expand Down Expand Up @@ -328,7 +333,8 @@ fn pub tests(t: mut Tests) {
t.true(parse('"\uDFFF\uDFFF"').error?)

{
let parser = Parser.new('"foo"')
let bytes = '"foo"'.to_byte_array
let parser = Parser.new(bytes)

parser.max_string_size = 2
t.true(parser.parse.error?)
Expand Down Expand Up @@ -380,7 +386,8 @@ fn pub tests(t: mut Tests) {
t.true(parse('{"a": true} "x"').error?)

{
let parser = Parser.new('{"a": {"b": {"c": 10}}}')
let bytes = '{"a": {"b": {"c": 10}}}'.to_byte_array
let parser = Parser.new(bytes)

parser.max_depth = 2
t.true(parser.parse.error?)
Expand All @@ -398,7 +405,13 @@ fn pub tests(t: mut Tests) {
t.true(parse("\u{EF}\u{BB}\u{BF}10").error?)
}

t.test('json.parse') fn (t) {
t.equal(json.parse('[10]'), Result.Ok(Json.Array([Json.Int(10)])))
t.test('Json.parse_string') fn (t) {
t.equal(Json.parse_string('[10]'), Result.Ok(Json.Array([Json.Int(10)])))
}

t.test('Json.parse_bytes') fn (t) {
let bytes = '[10]'.to_byte_array

t.equal(Json.parse_bytes(bytes), Result.Ok(Json.Array([Json.Int(10)])))
}
}

0 comments on commit c78555e

Please sign in to comment.