Skip to content

Commit

Permalink
Rework iterating over byte sequences
Browse files Browse the repository at this point in the history
This removes std::io::StringReader and std::io::ByteArrayReader, in
favour of String and ByteArray providing a dedicated Bytes iterator that
also implements Read. The Bytes iterator implements the new
std::iter::Bytes trait. This trait is a regular iterator, but also
provides the "next_byte" method to pull bytes out of the iterator
without wrapping them in an Option type. This makes the Bytes iterator
more efficient when dealing with large input streams.

Changelog: changed
  • Loading branch information
yorickpeterse committed Oct 5, 2022
1 parent 9d0884c commit 6620c44
Show file tree
Hide file tree
Showing 7 changed files with 251 additions and 147 deletions.
59 changes: 55 additions & 4 deletions libstd/src/std/byte_array.inko
Expand Up @@ -5,7 +5,8 @@ import std::drop::Drop
import std::fmt::(Format, Formatter)
import std::hash::(Hash, Hasher)
import std::index::(bounds_check, Index, SetIndex)
import std::iter::(Enum, Iter)
import std::io::Read
import std::iter::(Bytes as BytesTrait, EOF, Enum, Iter)
import std::option::Option
import std::string::(IntoString, ToString)

Expand Down Expand Up @@ -263,9 +264,9 @@ class builtin ByteArray {
iter.to_array
}

# Returns an `Iter` that iterates over all values in `self`.
fn pub iter -> Iter[Int, Never] {
Enum.indexed(length) fn (i) { _INKO.byte_array_get(self, i) }
# Returns an iterator over the bytes in `self`.
fn pub iter -> Bytes {
Bytes { @bytes = self, @index = 0 }
}
}

Expand Down Expand Up @@ -412,3 +413,53 @@ impl Format for ByteArray {
formatter.write(']')
}
}

# An iterator over the bytes in a `ByteArray`.
#
# This iterator supports mutating the underlying `ByteArray` during iteration,
# though it's recommended not to do so as the results may be confusing.
class pub Bytes {
let @bytes: ref ByteArray
let @index: Int
}

impl Iter[Int, Never] for Bytes {
fn pub mut next -> Option[Int] {
match next_byte {
case EOF -> Option.None
case byte -> Option.Some(byte)
}
}
}

impl BytesTrait[Never] for Bytes {
fn pub mut next_byte -> Int {
if @index < @bytes.length {
_INKO.byte_array_get(@bytes, @index := @index + 1)
} else {
EOF
}
}
}

impl Read for Bytes {
fn pub mut read(into: mut ByteArray, size: Int) -> Int {
let mut read = 0

while read < size {
match next_byte {
case EOF -> break
case byte -> {
into.push(byte)
read += 1
}
}
}

read
}

fn pub mut read_all(bytes: mut ByteArray) -> Int {
read(into: bytes, size: @bytes.length - @index)
}
}
78 changes: 0 additions & 78 deletions libstd/src/std/io.inko
Expand Up @@ -262,81 +262,3 @@ trait pub Seek {
# the end.
fn pub mut seek(position: Int) !! Error -> Int
}

# A type that allows reading of a String.
#
# A `StringReader` is useful when you have a `String` and want to use it where
# a `Read` type is expected.
#
# # Error handling
#
# The implementations of `read` and `read_all` never throw an error, so no error
# handling is needed when using a value typed as `StringReader`.
class pub StringReader {
let @string: ref String
let @index: Int

fn pub static new(string: ref String) -> Self {
Self { @string = string, @index = 0 }
}
}

impl Read for StringReader {
fn pub mut read(into: mut ByteArray, size: Int) -> Int {
let mut read = 0
let max = @string.size

while read < size and @index < max {
into.push(@string.byte(@index := @index + 1))
read += 1
}

read
}

fn pub mut read_all(bytes: mut ByteArray) -> Int {
read(into: bytes, size: @string.size - @index)
}
}

# A type that allows reading of a `ByteArray`.
#
# A `ByteArrayReader` is useful when you have a `ByteArray` and want to use it
# where a `Read` type is expected.
#
# Since a `ByteArray` is mutable and this reader takes an immutable reference to
# a `ByteArray`, it's possible for the `ByteArray` to be mutated while this
# reader exists. This reader supports this without issue, though it's
# recommended to not mutate the `ByteArray` while reading from it, as this may
# lead to confusing results.
#
# # Error handling
#
# The implementations of `read` and `read_all` never throw an error, so no error
# handling is needed when using a value typed as `ByteArrayReader`.
class pub ByteArrayReader {
let @bytes: ref ByteArray
let @index: Int

fn pub static new(bytes: ref ByteArray) -> Self {
Self { @bytes = bytes, @index = 0 }
}
}

impl Read for ByteArrayReader {
fn pub mut read(into: mut ByteArray, size: Int) -> Int {
let mut read = 0
let max = @bytes.length

while read < size and @index < max {
into.push(@bytes[@index := @index + 1])
read += 1
}

read
}

fn pub mut read_all(bytes: mut ByteArray) -> Int {
read(into: bytes, size: @bytes.length - @index)
}
}
39 changes: 39 additions & 0 deletions libstd/src/std/iter.inko
Expand Up @@ -24,6 +24,9 @@ import std::cmp::Equal
import std::option::Option
import std::string::(ToString, StringBuffer)

# The "byte" that signals the end in a `Bytes` iterator.
let pub EOF = -1

# A generic iterator over a sequence of values of type `T`.
#
# The type parameter `T` is the type of values that is produced. The type
Expand Down Expand Up @@ -458,6 +461,42 @@ impl Iter[T, E] for Enum {
}
}

# An iterator over a sequence of bytes.
#
# A `Bytes` is a regular iterator, but introduces the extra method `next_byte`.
# This method is similar to `Iter.next` in that it advances the iterator, but
# instead of returning an `Option[Int]` it returns an `Int`. This allows `Bytes`
# to be used as both a regular iterator and a more specialised (and more
# efficient) iterator over (large) sequences of bytes.
#
# When implementing `Bytes` for a type, you must also implement `Iter`. The
# easiest way of doing this is to have `Iter.next` reuse the implementation of
# `Bytes.next_byte` like so:
#
# impl Iter[Int, Never] for MyType {
# fn pub mut next -> Option[Int] {
# match next_byte {
# case EOF -> Option.None
# case byte -> Option.Some(byte)
# }
# }
# }
#
# impl Bytes[Never] for MyType {
# fn pub mut next_byte -> Int {
# # ...
# }
# }
#
# The type parameter `E` specifies the error that `next` may throw. If a stream
# can't throw, this parameter should be assigned to `Never`.
trait pub Bytes[E]: Iter[Int, E] {
# Returns the next byte in the iterator.
#
# If all input is consumed, this method must return `std::iter::EOF`.
fn pub mut next_byte !! E -> Int
}

# Joins the values of an iterator together using a separator.
#
# # Examples
Expand Down
54 changes: 51 additions & 3 deletions libstd/src/std/string.inko
Expand Up @@ -13,7 +13,8 @@ import std::fmt::(Format, Formatter)
import std::fs::path::(IntoPath, Path, ToPath)
import std::hash::(Hash, Hasher)
import std::index::(bounds_check)
import std::iter::(Enum, Iter)
import std::io::Read
import std::iter::(Bytes as BytesTrait, EOF, Enum, Iter)
import std::ops::Add

let TAB_BYTE = 9
Expand Down Expand Up @@ -236,8 +237,8 @@ class builtin String {
}

# Returns an iterator over the bytes in `self`.
fn pub bytes -> Enum[Int, Never] {
Enum.indexed(size) fn (index) { _INKO.string_byte(self, index) }
fn pub bytes -> Bytes {
Bytes { @string = self, @index = 0 }
}

# Splits `self` into an iterator of `Strings`, each separated by the given
Expand Down Expand Up @@ -582,6 +583,53 @@ impl Drop for Characters {
}
}

# An iterator over the bytes in a `String`.
class pub Bytes {
let @string: ref String
let @index: Int
}

impl Iter[Int, Never] for Bytes {
fn pub mut next -> Option[Int] {
match next_byte {
case EOF -> Option.None
case byte -> Option.Some(byte)
}
}
}

impl BytesTrait[Never] for Bytes {
fn pub mut next_byte -> Int {
if @index < @string.size {
_INKO.string_byte(@string, @index := @index + 1)
} else {
EOF
}
}
}

impl Read for Bytes {
fn pub mut read(into: mut ByteArray, size: Int) -> Int {
let mut read = 0

while read < size {
match next_byte {
case EOF -> break
case byte -> {
into.push(byte)
read += 1
}
}
}

read
}

fn pub mut read_all(bytes: mut ByteArray) -> Int {
read(into: bytes, size: @string.size - @index)
}
}

# A buffer for efficiently concatenating `String` objects together.
#
# When concatenating multiple `String` objects together, intermediate `String`
Expand Down
61 changes: 61 additions & 0 deletions libstd/test/std/test_byte_array.inko
@@ -1,4 +1,5 @@
import helpers::(fmt, hash)
import std::iter::EOF
import std::test::Tests

fn pub tests(t: mut Tests) {
Expand Down Expand Up @@ -165,4 +166,64 @@ fn pub tests(t: mut Tests) {
t.equal(fmt(ByteArray.from_array([10])), '[10]')
t.equal(fmt(ByteArray.from_array([10, 20])), '[10, 20]')
}

t.test('Bytes.next') fn (t) {
let bytes = 'abc'.to_byte_array
let iter = bytes.iter

t.equal(iter.next, Option.Some(97))
t.equal(iter.next, Option.Some(98))
t.equal(iter.next, Option.Some(99))
t.equal(iter.next, Option.None)
}

t.test('Bytes.next_byte') fn (t) {
let bytes = 'abc'.to_byte_array
let iter = bytes.iter

t.equal(iter.next_byte, 97)
t.equal(iter.next_byte, 98)
t.equal(iter.next_byte, 99)
t.equal(iter.next_byte, EOF)
}

t.test('Bytes.read') fn (t) {
let input = 'foo'.to_byte_array
let iter = input.iter
let buff = ByteArray.new

t.equal(iter.read(into: buff, size: 2), 2)
t.equal(buff.to_string, 'fo')
t.equal(iter.read(into: buff, size: 2), 1)
t.equal(buff.to_string, 'foo')
t.equal(iter.read(into: buff, size: 2), 0)
t.equal(buff.to_string, 'foo')
}

t.test('Bytes.read with a mutated ByteArray') fn (t) {
let input = 'foo'.to_byte_array
let iter = input.iter
let buff = ByteArray.new

t.equal(iter.read(into: buff, size: 1), 1)
t.equal(buff.to_string, 'f')

input.pop
t.equal(iter.read(into: buff, size: 2), 1)
t.equal(buff.to_string, 'fo')

input.push(111)
t.equal(iter.read(into: buff, size: 2), 1)
t.equal(buff.to_string, 'foo')
}

t.test('Bytes.read_all') fn (t) {
let input = 'foo'.to_byte_array
let iter = input.iter
let buff = ByteArray.new

t.equal(iter.read_all(buff), 3)
t.equal(iter.read_all(buff), 0)
t.equal(buff.to_string, 'foo')
}
}

0 comments on commit 6620c44

Please sign in to comment.