From b408762084b32a2a9015c81a1d5d07e5e40a2a26 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Fri, 21 Oct 2022 23:09:18 +0200 Subject: [PATCH 01/17] Working --- src/JsonDecoder/StringOnlyDecoder.php | 27 +++++++++++++ src/Parser.php | 38 +++++++++++++++++-- src/Tokens.php | 15 ++++++++ .../JsonDecoder/StringOnlyDecoderTest.php | 32 ++++++++++++++++ test/JsonMachineTest/ParserTest.php | 17 +++++++++ 5 files changed, 125 insertions(+), 4 deletions(-) create mode 100644 src/JsonDecoder/StringOnlyDecoder.php create mode 100644 test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php diff --git a/src/JsonDecoder/StringOnlyDecoder.php b/src/JsonDecoder/StringOnlyDecoder.php new file mode 100644 index 0000000..1702008 --- /dev/null +++ b/src/JsonDecoder/StringOnlyDecoder.php @@ -0,0 +1,27 @@ +innerDecoder = $innerDecoder; + } + + public function decode($jsonValue) + { + if (is_string($jsonValue)) { + return $this->innerDecoder->decode($jsonValue); + } + + return new ValidResult($jsonValue); + } +} diff --git a/src/Parser.php b/src/Parser.php index 8a18265..30d7ac2 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -4,6 +4,7 @@ namespace JsonMachine; +use IteratorAggregate; use JsonMachine\Exception\InvalidArgumentException; use JsonMachine\Exception\JsonMachineException; use JsonMachine\Exception\PathNotFoundException; @@ -11,6 +12,7 @@ use JsonMachine\Exception\UnexpectedEndSyntaxErrorException; use JsonMachine\JsonDecoder\ExtJsonDecoder; use JsonMachine\JsonDecoder\ItemDecoder; +use JsonMachine\JsonDecoder\StringOnlyDecoder; use Traversable; class Parser implements \IteratorAggregate, PositionAware @@ -52,21 +54,28 @@ class Parser implements \IteratorAggregate, PositionAware /** @var bool */ private $hasSingleJsonPointer; + /** @var bool */ + private bool $recursive; + /** * @param array|string $jsonPointer Follows json pointer RFC https://tools.ietf.org/html/rfc6901 * @param ItemDecoder $jsonDecoder * * @throws InvalidArgumentException */ - public function __construct(Traversable $tokens, $jsonPointer = '', ItemDecoder $jsonDecoder = null) + public function __construct(Traversable $tokens, $jsonPointer = '', ItemDecoder $jsonDecoder = null, $recursive = false) { $jsonPointers = (new ValidJsonPointers((array) $jsonPointer))->toArray(); $this->tokens = $tokens; $this->jsonDecoder = $jsonDecoder ?: new ExtJsonDecoder(); + if ($recursive) { + $this->jsonDecoder = new StringOnlyDecoder($this->jsonDecoder); + } $this->hasSingleJsonPointer = (count($jsonPointers) === 1); $this->jsonPointers = array_combine($jsonPointers, $jsonPointers); $this->paths = $this->buildPaths($this->jsonPointers); + $this->recursive = $recursive; } private function buildPaths(array $jsonPointers): array @@ -141,7 +150,12 @@ public function getIterator() ) ) ) { - $jsonBuffer .= $token; + if ($this->recursive && ($token == '{' || $token == '[')) { + $jsonBuffer = new self($this->remainingTokens(), '', $this->jsonDecoder, true); + $token = ' '; + } else { + $jsonBuffer .= $token; + } } // todo move this switch to the top just after the syntax check to be a correct FSM switch ($token[0]) { @@ -212,7 +226,7 @@ public function getIterator() $expectedType = 96; // 96 = self::AFTER_ARRAY_VALUE; } } - if ($currentLevel > $iteratorLevel) { + if ($currentLevel > $iteratorLevel && ! $this->recursive) { continue; // a valid json chunk is not completed yet } if ($jsonBuffer !== '') { @@ -244,6 +258,9 @@ public function getIterator() $subtreeEnded = true; break; } + if ($currentLevel < 0) { + break; + } } if ($token === null) { @@ -262,6 +279,19 @@ public function getIterator() $this->currentPath = null; } + /** + * @return void + */ + private function remainingTokens() + { + /** @var \Iterator $iterator */ + $iterator = $this->tokens instanceOf IteratorAggregate ? $this->tokens->getIterator() : $this->tokens; + while ($iterator->valid()) { + yield $iterator->current(); + $iterator->next(); + } + } + private function tokenTypes() { $allBytes = []; @@ -346,7 +376,7 @@ public function getMatchedJsonPointer(): string */ private function error($msg, $token, $exception = SyntaxErrorException::class) { - throw new $exception($msg." '".$token."'", $this->tokens->getPosition()); + throw new $exception($msg." '".$token."'", method_exists($this->tokens, 'getPosition') ? $this->tokens->getPosition() : ''); } /** diff --git a/src/Tokens.php b/src/Tokens.php index 2ba96c8..8ac8a31 100644 --- a/src/Tokens.php +++ b/src/Tokens.php @@ -11,6 +11,9 @@ class Tokens implements \IteratorAggregate, PositionAware /** @var iterable */ private $jsonChunks; + /** @var Generator */ + private $generator; + /** * @param iterable $jsonChunks */ @@ -24,6 +27,18 @@ public function __construct($jsonChunks) */ #[\ReturnTypeWillChange] public function getIterator() + { + if ( ! $this->generator) { + $this->generator = $this->innerGenerator(); + } + + return $this->generator; + } + + /** + * @return Generator + */ + public function innerGenerator() { $insignificantBytes = $this->insignificantBytes(); $tokenBoundaries = $this->tokenBoundaries(); diff --git a/test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php b/test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php new file mode 100644 index 0000000..35946de --- /dev/null +++ b/test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php @@ -0,0 +1,32 @@ +assertSame('value', $decoder->decode('"value"')->getValue()); + } + + public function testDoesNotPassParserIntoInnerDecoder() + { + $innerDecoder = new ExtJsonDecoder(); + $decoder = new StringOnlyDecoder($innerDecoder); + $parser = new Parser(new \ArrayObject(['[]'])); + + $this->assertSame($parser, $decoder->decode($parser)->getValue()); + } +} diff --git a/test/JsonMachineTest/ParserTest.php b/test/JsonMachineTest/ParserTest.php index 6065450..b3ba233 100644 --- a/test/JsonMachineTest/ParserTest.php +++ b/test/JsonMachineTest/ParserTest.php @@ -4,6 +4,7 @@ namespace JsonMachineTest; +use Generator; use JsonMachine\Exception\JsonMachineException; use JsonMachine\Exception\PathNotFoundException; use JsonMachine\Exception\SyntaxErrorException; @@ -526,4 +527,20 @@ public function testThrowsMeaningfulErrorOnIncorrectTokens() foreach ($parser as $index => $item) { } } + + public function testRecursiveIteration() + { + $parser = new Parser(new Tokens(['[{"numbers": [42]}]']), '', null, true); + + foreach ($parser as $object) { + $this->assertInstanceOf(Parser::class, $object); + foreach ($object as $key => $values) { + $this->assertInstanceOf(Parser::class, $values); + $this->assertSame("numbers", $key); + foreach ($values as $fortyTwo) { + $this->assertSame(42, $fortyTwo); + } + } + } + } } From 1466ddbf020c32f81e17caabcc52dbd6592c63c6 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 22 Oct 2022 11:48:27 +0200 Subject: [PATCH 02/17] \JsonMachineTest\ParserTest::testZigZagRecursiveIteration --- test/JsonMachineTest/ParserTest.php | 33 +++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/test/JsonMachineTest/ParserTest.php b/test/JsonMachineTest/ParserTest.php index b3ba233..38ead32 100644 --- a/test/JsonMachineTest/ParserTest.php +++ b/test/JsonMachineTest/ParserTest.php @@ -14,6 +14,7 @@ use JsonMachine\StringChunks; use JsonMachine\Tokens; use JsonMachine\TokensWithDebugging; +use Traversable; /** * @covers \JsonMachine\Parser @@ -530,12 +531,12 @@ public function testThrowsMeaningfulErrorOnIncorrectTokens() public function testRecursiveIteration() { - $parser = new Parser(new Tokens(['[{"numbers": [42]}]']), '', null, true); + $array = new Parser(new Tokens(['[{"numbers": [42]}]']), '', null, true); - foreach ($parser as $object) { - $this->assertInstanceOf(Parser::class, $object); + foreach ($array as $object) { + $this->assertInstanceOf(Traversable::class, $object); foreach ($object as $key => $values) { - $this->assertInstanceOf(Parser::class, $values); + $this->assertInstanceOf(Traversable::class, $values); $this->assertSame("numbers", $key); foreach ($values as $fortyTwo) { $this->assertSame(42, $fortyTwo); @@ -543,4 +544,28 @@ public function testRecursiveIteration() } } } + + public function testZigZagRecursiveIteration() + { + $objectKeysToVisit = ['numbers', 'string', 'more numbers']; + $objectKeysVisited = []; + $valuesToVisit = [41, 42, 'text', 43]; + $valuesVisited = []; + + $array = new Parser(new Tokens(['[{"numbers": [41, 42], "string": ["text"], "more numbers": [43]}]']), '', null, true); + + foreach ($array as $object) { + $this->assertInstanceOf(Traversable::class, $object); + foreach ($object as $key => $values) { + $objectKeysVisited[] = $key; + $this->assertInstanceOf(Traversable::class, $values); + foreach ($values as $value) { + $valuesVisited[] = $value; + } + } + } + + $this->assertSame($objectKeysToVisit, $objectKeysVisited); + $this->assertSame($valuesToVisit, $valuesVisited); + } } From e21abf9342c597c969cea416f95d9f8b6c8fb419 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 22 Oct 2022 12:17:52 +0200 Subject: [PATCH 03/17] Added 'recursive' option --- src/Items.php | 3 ++- src/ItemsOptions.php | 6 ++++++ test/JsonMachineTest/ItemsOptionsTest.php | 1 + test/JsonMachineTest/ItemsTest.php | 13 +++++++++++++ 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/Items.php b/src/Items.php index 1dfb958..6c932ad 100644 --- a/src/Items.php +++ b/src/Items.php @@ -63,7 +63,8 @@ public function __construct($bytesIterator, array $options = []) $this->chunks ), $this->jsonPointer, - $this->jsonDecoder ?: new ExtJsonDecoder() + $this->jsonDecoder ?: new ExtJsonDecoder(), + $options['recursive'] ); } diff --git a/src/ItemsOptions.php b/src/ItemsOptions.php index 0e528b9..8dc9c0c 100644 --- a/src/ItemsOptions.php +++ b/src/ItemsOptions.php @@ -66,12 +66,18 @@ private function opt_debug(bool $debug) return $debug; } + private function opt_recursive(bool $recursive) + { + return $recursive; + } + public static function defaultOptions(): array { return [ 'pointer' => '', 'decoder' => new ExtJsonDecoder(), 'debug' => false, + 'recursive' => false, ]; } } diff --git a/test/JsonMachineTest/ItemsOptionsTest.php b/test/JsonMachineTest/ItemsOptionsTest.php index 3dd1cf4..3a4434e 100644 --- a/test/JsonMachineTest/ItemsOptionsTest.php +++ b/test/JsonMachineTest/ItemsOptionsTest.php @@ -53,6 +53,7 @@ private function defaultOptions() 'pointer' => '', 'decoder' => new ExtJsonDecoder(), 'debug' => false, + 'recursive' => false, ]; } diff --git a/test/JsonMachineTest/ItemsTest.php b/test/JsonMachineTest/ItemsTest.php index d0d06d7..6ecb0ca 100644 --- a/test/JsonMachineTest/ItemsTest.php +++ b/test/JsonMachineTest/ItemsTest.php @@ -6,6 +6,7 @@ use JsonMachine\Items; use JsonMachine\JsonDecoder\PassThruDecoder; +use Traversable; /** * @covers \JsonMachine\Items @@ -139,4 +140,16 @@ public function testGetJsonPointers() $this->assertSame(['/one', '/two'], $items->getJsonPointers()); } + + public function testRecursiveIteration() + { + $items = Items::fromString('[[":)"]]', ['recursive' => true]); + + foreach ($items as $emojis) { + $this->assertInstanceOf(Traversable::class, $emojis); + foreach ($emojis as $emoji) { + $this->assertSame(":)", $emoji); + } + } + } } From 64b5f2315763ed299195c97f28acb06fe37ae8c8 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 22 Oct 2022 20:09:14 +0200 Subject: [PATCH 04/17] Documentation --- CHANGELOG.md | 3 ++- README.md | 61 ++++++++++++++++++++++++++++++++++------------------ 2 files changed, 42 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a275597..50130f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## master -Nothing yet +### Added +- Recursive iteration via `recursive` option.
diff --git a/README.md b/README.md index 2a42cca..9b3957a 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ for PHP >=7.0. See [TL;DR](#tl-dr). No dependencies in production except optiona + [Parsing nested values in arrays](#parsing-nested-values) + [Parsing a single scalar value](#getting-scalar-values) + [Parsing multiple subtrees](#parsing-multiple-subtrees) + + [Recursive iteration](#recursive) + [What is JSON Pointer anyway?](#json-pointer) * [Options](#options) * [Parsing streaming responses from a JSON API](#parsing-json-stream-api-responses) @@ -320,6 +321,39 @@ foreach ($fruits as $key => $value) { } ``` + +### Recursive iteration (BETA) +Recursive iteration can be enabled via `recursive` option set to `true`. +Every JSON iterable that JSON Machine encounters will then be yielded as a lazy instance of `Traversable`. +No JSON vector will be materialized and kept in memory. +The only PHP values you get materialized will be scalar values. +Let's see an example with many, many users with many, many friends + +```php + true]); +foreach ($users as $user) { // $user instanceof Traversable, not an array/object + foreach ($user as $userField => $userValue) { + if ($userField == 'friends') { + foreach ($userValue as $friend) { // $userValue instanceof Traversable, not an array/object + foreach ($user as $friendField => $friendValue) { // $friend instanceof Traversable, not an array/object + // do whatever you want here + // maybe rather use PHP's Recursive*Iterators + } + } + } + } +} +``` + +> You **MUST** iterate such lazy `Traversable`s in real time. +> **NEVER** skip an iteration of such `Traversable` and +> **NEVER** keep references to such past `Traversable`s to iterate them later +> or you end up (almost) like [this guy](https://xkcd.com/292/). + ### What is JSON Pointer anyway? It's a way of addressing one item in JSON document. See the [JSON Pointer RFC 6901](https://tools.ietf.org/html/rfc6901). @@ -347,6 +381,7 @@ Some examples: Options may change how a JSON is parsed. Array of options is the second parameter of all `Items::from*` functions. Available options are: - `pointer` - A JSON Pointer string that tells which part of the document you want to iterate. +- `recursive` - Bool. Any JSON array/object the parser hits will not be decoded but served lazily as a `Traversable`. Default `false`. - `decoder` - An instance of `ItemDecoder` interface. - `debug` - `true` or `false` to enable or disable the debug mode. When the debug mode is enabled, data such as line, column and position in the document are available during parsing or in exceptions. Keeping debug disabled adds slight @@ -518,30 +553,14 @@ but you forgot to specify a JSON Pointer. See [Parsing a subtree](#parsing-a-sub ### "That didn't help" The other reason may be, that one of the items you iterate is itself so huge it cannot be decoded at once. For example, you iterate over users and one of them has thousands of "friend" objects in it. -Use `PassThruDecoder` which does not decode an item, get the json string of the user -and parse it iteratively yourself using `Items::fromString()`. - -```php - new PassThruDecoder]); -foreach ($users as $user) { - foreach (Items::fromString($user, ['pointer' => "/friends"]) as $friend) { - // process friends one by one - } -} -``` +The most efficient solution is to set `recursive` option to `true`. +See [Recursive iteration](#recursive). ### "I am still out of luck" -It probably means that the JSON string `$user` itself or one of the friends are too big and do not fit in memory. -However, you can try this approach recursively. Parse `"/friends"` with `PassThruDecoder` getting one `$friend` -json string at a time and then parse that using `Items::fromString()`... If even that does not help, -there's probably no solution yet via JSON Machine. A feature is planned which will enable you to iterate -any structure fully recursively and strings will be served as streams. +It probably means that a single JSON string itself is too big to fit in memory. +For example very big file encoded as base64. +In that case you will probably be still out of luck until JSON Machine supports yielding of scalar values as PHP streams. ## Installation From aba5205393029e002d81dbafef4a0022d17157e6 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 22 Oct 2022 20:20:17 +0200 Subject: [PATCH 05/17] Build fixed --- src/JsonDecoder/StringOnlyDecoder.php | 2 -- src/Parser.php | 4 ++-- test/JsonMachineTest/ItemsTest.php | 2 +- test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php | 3 ++- test/JsonMachineTest/ParserTest.php | 3 +-- 5 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/JsonDecoder/StringOnlyDecoder.php b/src/JsonDecoder/StringOnlyDecoder.php index 1702008..ebdf544 100644 --- a/src/JsonDecoder/StringOnlyDecoder.php +++ b/src/JsonDecoder/StringOnlyDecoder.php @@ -4,8 +4,6 @@ namespace JsonMachine\JsonDecoder; -use JsonMachine\Parser; - class StringOnlyDecoder implements ItemDecoder { /** @var ItemDecoder */ diff --git a/src/Parser.php b/src/Parser.php index 30d7ac2..89f15ad 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -55,7 +55,7 @@ class Parser implements \IteratorAggregate, PositionAware private $hasSingleJsonPointer; /** @var bool */ - private bool $recursive; + private $recursive; /** * @param array|string $jsonPointer Follows json pointer RFC https://tools.ietf.org/html/rfc6901 @@ -285,7 +285,7 @@ public function getIterator() private function remainingTokens() { /** @var \Iterator $iterator */ - $iterator = $this->tokens instanceOf IteratorAggregate ? $this->tokens->getIterator() : $this->tokens; + $iterator = $this->tokens instanceof IteratorAggregate ? $this->tokens->getIterator() : $this->tokens; while ($iterator->valid()) { yield $iterator->current(); $iterator->next(); diff --git a/test/JsonMachineTest/ItemsTest.php b/test/JsonMachineTest/ItemsTest.php index 6ecb0ca..842c13c 100644 --- a/test/JsonMachineTest/ItemsTest.php +++ b/test/JsonMachineTest/ItemsTest.php @@ -148,7 +148,7 @@ public function testRecursiveIteration() foreach ($items as $emojis) { $this->assertInstanceOf(Traversable::class, $emojis); foreach ($emojis as $emoji) { - $this->assertSame(":)", $emoji); + $this->assertSame(':)', $emoji); } } } diff --git a/test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php b/test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php index 35946de..5d7812e 100644 --- a/test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php +++ b/test/JsonMachineTest/JsonDecoder/StringOnlyDecoderTest.php @@ -1,9 +1,10 @@ assertInstanceOf(Traversable::class, $object); foreach ($object as $key => $values) { $this->assertInstanceOf(Traversable::class, $values); - $this->assertSame("numbers", $key); + $this->assertSame('numbers', $key); foreach ($values as $fortyTwo) { $this->assertSame(42, $fortyTwo); } From ccd4a7d39084e485ccb611e5a4a42319b44f6a8f Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 22 Oct 2022 22:05:47 +0200 Subject: [PATCH 06/17] Tokens reverted. Iterator memoization moved from Tokens to Parser --- src/Parser.php | 23 ++++++++++++++++------- src/Tokens.php | 15 --------------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index 89f15ad..fb7f5b1 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -4,6 +4,8 @@ namespace JsonMachine; +use Generator; +use Iterator; use IteratorAggregate; use JsonMachine\Exception\InvalidArgumentException; use JsonMachine\Exception\JsonMachineException; @@ -36,6 +38,9 @@ class Parser implements \IteratorAggregate, PositionAware /** @var Traversable */ private $tokens; + /** @var Iterator */ + private $tokensIterator; + /** @var ItemDecoder */ private $jsonDecoder; @@ -59,7 +64,7 @@ class Parser implements \IteratorAggregate, PositionAware /** * @param array|string $jsonPointer Follows json pointer RFC https://tools.ietf.org/html/rfc6901 - * @param ItemDecoder $jsonDecoder + * @param bool $recursive * * @throws InvalidArgumentException */ @@ -68,6 +73,8 @@ public function __construct(Traversable $tokens, $jsonPointer = '', ItemDecoder $jsonPointers = (new ValidJsonPointers((array) $jsonPointer))->toArray(); $this->tokens = $tokens; + $this->tokensIterator = $tokens instanceof IteratorAggregate ? $tokens->getIterator() : $tokens; + $this->jsonDecoder = $jsonDecoder ?: new ExtJsonDecoder(); if ($recursive) { $this->jsonDecoder = new StringOnlyDecoder($this->jsonDecoder); @@ -86,7 +93,7 @@ private function buildPaths(array $jsonPointers): array } /** - * @return \Generator + * @return Generator * * @throws PathNotFoundException */ @@ -114,7 +121,7 @@ public function getIterator() $iteratorLevel = 0; // local variables for faster name lookups - $tokens = $this->tokens; + $tokens = $this->tokensIterator; foreach ($tokens as $token) { if ($currentPathChanged) { @@ -280,12 +287,11 @@ public function getIterator() } /** - * @return void + * @return Generator */ private function remainingTokens() { - /** @var \Iterator $iterator */ - $iterator = $this->tokens instanceof IteratorAggregate ? $this->tokens->getIterator() : $this->tokens; + $iterator = $this->tokensIterator; while ($iterator->valid()) { yield $iterator->current(); $iterator->next(); @@ -376,7 +382,10 @@ public function getMatchedJsonPointer(): string */ private function error($msg, $token, $exception = SyntaxErrorException::class) { - throw new $exception($msg." '".$token."'", method_exists($this->tokens, 'getPosition') ? $this->tokens->getPosition() : ''); + throw new $exception( + $msg." '".$token."'", + $this->tokens instanceof PositionAware ? $this->tokens->getPosition() : '' + ); } /** diff --git a/src/Tokens.php b/src/Tokens.php index 8ac8a31..2ba96c8 100644 --- a/src/Tokens.php +++ b/src/Tokens.php @@ -11,9 +11,6 @@ class Tokens implements \IteratorAggregate, PositionAware /** @var iterable */ private $jsonChunks; - /** @var Generator */ - private $generator; - /** * @param iterable $jsonChunks */ @@ -27,18 +24,6 @@ public function __construct($jsonChunks) */ #[\ReturnTypeWillChange] public function getIterator() - { - if ( ! $this->generator) { - $this->generator = $this->innerGenerator(); - } - - return $this->generator; - } - - /** - * @return Generator - */ - public function innerGenerator() { $insignificantBytes = $this->insignificantBytes(); $tokenBoundaries = $this->tokenBoundaries(); From adcfcde790e83a5399251ddac042e3fbb4a2cac4 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 22 Oct 2022 22:19:24 +0200 Subject: [PATCH 07/17] Removed useless condition --- src/Parser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parser.php b/src/Parser.php index fb7f5b1..e3e00fd 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -233,7 +233,7 @@ public function getIterator() $expectedType = 96; // 96 = self::AFTER_ARRAY_VALUE; } } - if ($currentLevel > $iteratorLevel && ! $this->recursive) { + if ($currentLevel > $iteratorLevel) { continue; // a valid json chunk is not completed yet } if ($jsonBuffer !== '') { From d40b0408e6295f6a656f91ed7278e5e26818e33c Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sat, 22 Oct 2022 22:22:56 +0200 Subject: [PATCH 08/17] $jsonBuffer -> $jsonValue --- src/Parser.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index e3e00fd..f1814a8 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -109,7 +109,7 @@ public function getIterator() $pointersFound = []; $currentLevel = -1; $stack = [$currentLevel => null]; - $jsonBuffer = ''; + $jsonValue = ''; $key = null; $objectKeyExpected = false; $inObject = true; // hack to make "!$inObject" in first iteration work. Better code structure? @@ -158,10 +158,10 @@ public function getIterator() ) ) { if ($this->recursive && ($token == '{' || $token == '[')) { - $jsonBuffer = new self($this->remainingTokens(), '', $this->jsonDecoder, true); + $jsonValue = new self($this->remainingTokens(), '', $this->jsonDecoder, true); $token = ' '; } else { - $jsonBuffer .= $token; + $jsonValue .= $token; } } // todo move this switch to the top just after the syntax check to be a correct FSM @@ -236,9 +236,9 @@ public function getIterator() if ($currentLevel > $iteratorLevel) { continue; // a valid json chunk is not completed yet } - if ($jsonBuffer !== '') { - $valueResult = $this->jsonDecoder->decode($jsonBuffer); - $jsonBuffer = ''; + if ($jsonValue !== '') { + $valueResult = $this->jsonDecoder->decode($jsonValue); + $jsonValue = ''; if ( ! $valueResult->isOk()) { $this->error($valueResult->getErrorMessage(), $token); } From bda567afdedfa4a4acc04a12302a30c00281892d Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Sun, 23 Oct 2022 22:11:25 +0200 Subject: [PATCH 09/17] Finishing of an unfinished sub-iterator for convenience --- README.md | 7 +++---- src/Parser.php | 20 +++++++++++++++++--- test/JsonMachineTest/ParserTest.php | 17 +++++++++++++++++ 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 9b3957a..75e084e 100644 --- a/README.md +++ b/README.md @@ -349,10 +349,9 @@ foreach ($users as $user) { // $user instanceof Traversable, not an array/object } ``` -> You **MUST** iterate such lazy `Traversable`s in real time. -> **NEVER** skip an iteration of such `Traversable` and -> **NEVER** keep references to such past `Traversable`s to iterate them later -> or you end up (almost) like [this guy](https://xkcd.com/292/). +> If you skip iteration of such lazy deeper-level `Traversable` and advance to a next value, you will not be able to iterate it later. +> JSON Machine must iterate it the background to be able to read next value. +> Such an attempt will result in closed generator exception. ### What is JSON Pointer anyway? diff --git a/src/Parser.php b/src/Parser.php index f1814a8..8b1bef0 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -158,7 +158,7 @@ public function getIterator() ) ) { if ($this->recursive && ($token == '{' || $token == '[')) { - $jsonValue = new self($this->remainingTokens(), '', $this->jsonDecoder, true); + $jsonValue = (new self($this->remainingTokens(), '', $this->jsonDecoder, true))->getIterator(); $token = ' '; } else { $jsonValue .= $token; @@ -242,17 +242,22 @@ public function getIterator() if ( ! $valueResult->isOk()) { $this->error($valueResult->getErrorMessage(), $token); } + $value = $valueResult->getValue(); if ($iteratorStruct == '[') { - yield $valueResult->getValue(); + yield $value; } else { $keyResult = $this->jsonDecoder->decode($key); if ( ! $keyResult->isOk()) { $this->error($keyResult->getErrorMessage(), $key); } - yield $keyResult->getValue() => $valueResult->getValue(); + yield $keyResult->getValue() => $value; unset($keyResult); } + if ($value instanceof Iterator && $value->valid()) { + $this->eatAllRemainingTokens($value); + } unset($valueResult); + unset($value); } if ( ! array_diff($jsonPointerPath, $currentPath) @@ -298,6 +303,15 @@ private function remainingTokens() } } + private function eatAllRemainingTokens(Iterator $iterator) + { + foreach ($iterator as $item) { + if ($item instanceof Iterator) { + $this->eatAllRemainingTokens($item); + } + } + } + private function tokenTypes() { $allBytes = []; diff --git a/test/JsonMachineTest/ParserTest.php b/test/JsonMachineTest/ParserTest.php index fb7e782..a0d7722 100644 --- a/test/JsonMachineTest/ParserTest.php +++ b/test/JsonMachineTest/ParserTest.php @@ -567,4 +567,21 @@ public function testZigZagRecursiveIteration() $this->assertSame($objectKeysToVisit, $objectKeysVisited); $this->assertSame($valuesToVisit, $valuesVisited); } + + public function testRecursiveParserDoesNotRequireChildParserToBeIteratedToTheEndByUser() + { + $iterator = new Parser(new Tokens(['[1,[{},2,3],4]']), '', null, true); + $array = []; + + foreach ($iterator as $item) { + $array[] = $item; + } + + $this->assertSame(1, $array[0]); + $this->assertInstanceOf(Traversable::class, $array[1]); + $this->assertSame(4, $array[2]); + + $this->expectExceptionMessage('generator'); + iterator_to_array($array[1]); + } } From da88e4da50ae9d2b6f809b5b942c10ff498ed3a3 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Mon, 24 Oct 2022 13:29:32 +0200 Subject: [PATCH 10/17] Readme fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 75e084e..c252163 100644 --- a/README.md +++ b/README.md @@ -339,7 +339,7 @@ foreach ($users as $user) { // $user instanceof Traversable, not an array/object foreach ($user as $userField => $userValue) { if ($userField == 'friends') { foreach ($userValue as $friend) { // $userValue instanceof Traversable, not an array/object - foreach ($user as $friendField => $friendValue) { // $friend instanceof Traversable, not an array/object + foreach ($friend as $friendField => $friendValue) { // $friend instanceof Traversable, not an array/object // do whatever you want here // maybe rather use PHP's Recursive*Iterators } From b22c24d33055fb5e26416758fc1b1b42badfebcb Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Mon, 24 Oct 2022 22:45:47 +0200 Subject: [PATCH 11/17] NestedIterator skeleton --- src/NestedIterator.php | 68 +++++++++++++++++++ test/JsonMachineTest/NestedIteratorTest.php | 75 +++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 src/NestedIterator.php create mode 100644 test/JsonMachineTest/NestedIteratorTest.php diff --git a/src/NestedIterator.php b/src/NestedIterator.php new file mode 100644 index 0000000..ce81754 --- /dev/null +++ b/src/NestedIterator.php @@ -0,0 +1,68 @@ +iterator = $iterator; + } + + #[\ReturnTypeWillChange] + public function current() + { + return $this->iterator->current(); + } + + #[\ReturnTypeWillChange] + public function next() + { + return $this->iterator->next(); + } + + #[\ReturnTypeWillChange] + public function key() + { + return $this->iterator->key(); + } + + #[\ReturnTypeWillChange] + public function valid() + { + return $this->iterator->valid(); + } + + #[\ReturnTypeWillChange] + public function rewind() + { + $this->iterator->rewind(); + } + + #[\ReturnTypeWillChange] + public function hasChildren() + { + return $this->iterator->current() instanceof Iterator; + } + + #[\ReturnTypeWillChange] + public function getChildren() + { + return $this->hasChildren() ? new self($this->current()) : null; + } + + public function advanceTo($key) + { + } + + public function materialize() + { + } +} diff --git a/test/JsonMachineTest/NestedIteratorTest.php b/test/JsonMachineTest/NestedIteratorTest.php new file mode 100644 index 0000000..1fcbd0c --- /dev/null +++ b/test/JsonMachineTest/NestedIteratorTest.php @@ -0,0 +1,75 @@ +assertSame([1, 2, 3], $result); + } + + public function testHasChildrenIgnoresArrays() + { + $generator = function () {yield from [1, [2], 3]; }; + $iterator = new NestedIterator($generator()); + + foreach ($iterator as $item) { + $this->assertFalse($iterator->hasChildren()); + } + } + + public function testHasChildrenFollowsIterators() + { + $generator = function () {yield from [1, new \ArrayIterator([]), 3]; }; + $iterator = new NestedIterator($generator()); + + $result = []; + foreach ($iterator as $item) { + $result[] = $iterator->hasChildren(); + } + + $this->assertSame([false, true, false], $result); + } + + public function testGetChildrenReturnsNestedIterator() + { + $generator = function () {yield from [1, new \ArrayIterator([]), 3]; }; + $iterator = new NestedIterator($generator()); + + $result = []; + foreach ($iterator as $item) { + $result[] = $iterator->getChildren(); + } + + $this->assertSame(null, $result[0]); + $this->assertInstanceOf(NestedIterator::class, $result[1]); + $this->assertSame(null, $result[2]); + } + + public function testGetChildrenReturnsCorrectItems() + { + $generator = function () {yield from [1, new \ArrayIterator([2]), 3]; }; + $iterator = new RecursiveIteratorIterator( + new NestedIterator($generator()) + ); + + $result = iterator_to_array($iterator, false); + + $this->assertSame([1, 2, 3], $result); + } +} From 3b3129bf14b330f08a81dda005237dc095684296 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Wed, 26 Oct 2022 23:21:02 +0200 Subject: [PATCH 12/17] advanceToKey() --- src/NestedIterator.php | 16 ++++++++++++++-- test/JsonMachineTest/NestedIteratorTest.php | 20 ++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/NestedIterator.php b/src/NestedIterator.php index ce81754..59f7165 100644 --- a/src/NestedIterator.php +++ b/src/NestedIterator.php @@ -5,6 +5,7 @@ namespace JsonMachine; use Iterator; +use JsonMachine\Exception\JsonMachineException; class NestedIterator implements \RecursiveIterator { @@ -58,11 +59,22 @@ public function getChildren() return $this->hasChildren() ? new self($this->current()) : null; } - public function advanceTo($key) + public function advanceToKey($key) { + $iterator = $this->iterator; + + while ($key !== $iterator->key() && $iterator->valid()) { + $iterator->next(); + } + + if ($key !== $iterator->key()) { + throw new JsonMachineException("Key '$key' was not found."); + } + + return $iterator->current(); } - public function materialize() + public function toArray() { } } diff --git a/test/JsonMachineTest/NestedIteratorTest.php b/test/JsonMachineTest/NestedIteratorTest.php index 1fcbd0c..69ffbd4 100644 --- a/test/JsonMachineTest/NestedIteratorTest.php +++ b/test/JsonMachineTest/NestedIteratorTest.php @@ -72,4 +72,24 @@ public function testGetChildrenReturnsCorrectItems() $this->assertSame([1, 2, 3], $result); } + + public function testAdvanceToKeyWorks() + { + $generator = function () {yield from ['one' => 1, 'two' => 2, 'three' => 3]; }; + $iterator = new NestedIterator($generator()); + + $this->assertSame(1, $iterator->advanceToKey('one')); + $this->assertSame(1, $iterator->advanceToKey('one')); + $this->assertSame(2, $iterator->advanceToKey('two')); + $this->assertSame(3, $iterator->advanceToKey('three')); + } + + public function testAdvanceToKeyThrows() + { + $generator = function () {yield from ['one' => 1, 'two' => 2, 'three' => 3]; }; + $iterator = new NestedIterator($generator()); + + $this->expectExceptionMessage('not found'); + $iterator->advanceToKey('four'); + } } From 55b26de19d7276c37320faed24984b50a0bee74b Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Thu, 27 Oct 2022 14:56:12 +0200 Subject: [PATCH 13/17] toArray() --- src/NestedIterator.php | 16 ++++++++++++++- test/JsonMachineTest/NestedIteratorTest.php | 22 +++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/NestedIterator.php b/src/NestedIterator.php index 59f7165..b132a4b 100644 --- a/src/NestedIterator.php +++ b/src/NestedIterator.php @@ -74,7 +74,21 @@ public function advanceToKey($key) return $iterator->current(); } - public function toArray() + public function toArray(): array { + return self::toArrayRecursive($this); + } + + private static function toArrayRecursive(\Traversable $traversable): array + { + $array = []; + foreach ($traversable as $key => $value) { + if ($value instanceof \Traversable) { + $value = self::toArrayRecursive($value); + } + $array[$key] = $value; + } + + return $array; } } diff --git a/test/JsonMachineTest/NestedIteratorTest.php b/test/JsonMachineTest/NestedIteratorTest.php index 69ffbd4..891a4e3 100644 --- a/test/JsonMachineTest/NestedIteratorTest.php +++ b/test/JsonMachineTest/NestedIteratorTest.php @@ -4,6 +4,8 @@ namespace JsonMachineTest; +use Generator; +use Iterator; use JsonMachine\NestedIterator; use PHPUnit\Framework\TestCase; use RecursiveIteratorIterator; @@ -92,4 +94,24 @@ public function testAdvanceToKeyThrows() $this->expectExceptionMessage('not found'); $iterator->advanceToKey('four'); } + + public function testToArray() + { + $generator = function ($iterable) {yield from ['one' => 1, 'two' => 2, 'i' => $iterable, 'three' => 3]; }; + $iterator = new NestedIterator($generator($generator(['42']))); + + $expected = [ + 'one' => 1, + 'two' => 2, + 'i' => [ + 'one' => 1, + 'two' => 2, + 'i' => ['42'], + 'three' => 3 + ], + 'three' => 3 + ]; + + $this->assertSame($expected, $iterator->toArray()); + } } From fd46d467bd2a54e2a3e00c55a7be6434dae56f50 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Thu, 30 Nov 2023 17:41:20 +0100 Subject: [PATCH 14/17] PHPStan fixes + testRecursiveIterationYieldsNestedIterator --- src/NestedIterator.php | 2 +- src/Parser.php | 20 ++++++++++++++++---- test/JsonMachineTest/NestedIteratorTest.php | 6 ++---- test/JsonMachineTest/ParserTest.php | 10 ++++++++++ 4 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/NestedIterator.php b/src/NestedIterator.php index b132a4b..a40b76f 100644 --- a/src/NestedIterator.php +++ b/src/NestedIterator.php @@ -26,7 +26,7 @@ public function current() #[\ReturnTypeWillChange] public function next() { - return $this->iterator->next(); + $this->iterator->next(); } #[\ReturnTypeWillChange] diff --git a/src/Parser.php b/src/Parser.php index fbc3595..d93085d 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -38,7 +38,7 @@ class Parser implements \IteratorAggregate, PositionAware /** @var Traversable */ private $tokens; - /** @var Iterator */ + /** @var Iterator */ private $tokensIterator; /** @var ItemDecoder */ @@ -64,7 +64,6 @@ class Parser implements \IteratorAggregate, PositionAware /** * @param array|string $jsonPointer Follows json pointer RFC https://tools.ietf.org/html/rfc6901 - * @param bool $recursive * * @throws InvalidArgumentException */ @@ -73,7 +72,13 @@ public function __construct(Traversable $tokens, $jsonPointer = '', ItemDecoder $jsonPointers = (new ValidJsonPointers((array) $jsonPointer))->toArray(); $this->tokens = $tokens; - $this->tokensIterator = $tokens instanceof IteratorAggregate ? $tokens->getIterator() : $tokens; + if ($tokens instanceof IteratorAggregate) { + $this->tokensIterator = $tokens->getIterator(); + } elseif ($tokens instanceof Iterator) { + $this->tokensIterator = $tokens; + } else { + throw new InvalidArgumentException('$tokens must be either an instance of Iterator or IteratorAggregate.'); + } $this->jsonDecoder = $jsonDecoder ?: new ExtJsonDecoder(); if ($recursive) { @@ -158,7 +163,14 @@ public function getIterator() ) ) { if ($this->recursive && ($token == '{' || $token == '[')) { - $jsonValue = (new self($this->remainingTokens(), '', $this->jsonDecoder, true))->getIterator(); + $jsonValue = new NestedIterator( + (new self( + $this->remainingTokens(), + '', + $this->jsonDecoder, + true + ))->getIterator() + ); $token = ' '; } else { $jsonValue .= $token; diff --git a/test/JsonMachineTest/NestedIteratorTest.php b/test/JsonMachineTest/NestedIteratorTest.php index 891a4e3..faeffa9 100644 --- a/test/JsonMachineTest/NestedIteratorTest.php +++ b/test/JsonMachineTest/NestedIteratorTest.php @@ -4,8 +4,6 @@ namespace JsonMachineTest; -use Generator; -use Iterator; use JsonMachine\NestedIterator; use PHPUnit\Framework\TestCase; use RecursiveIteratorIterator; @@ -107,9 +105,9 @@ public function testToArray() 'one' => 1, 'two' => 2, 'i' => ['42'], - 'three' => 3 + 'three' => 3, ], - 'three' => 3 + 'three' => 3, ]; $this->assertSame($expected, $iterator->toArray()); diff --git a/test/JsonMachineTest/ParserTest.php b/test/JsonMachineTest/ParserTest.php index 62983c4..b14b342 100644 --- a/test/JsonMachineTest/ParserTest.php +++ b/test/JsonMachineTest/ParserTest.php @@ -9,6 +9,7 @@ use JsonMachine\Exception\SyntaxErrorException; use JsonMachine\Exception\UnexpectedEndSyntaxErrorException; use JsonMachine\JsonDecoder\ExtJsonDecoder; +use JsonMachine\NestedIterator; use JsonMachine\Parser; use JsonMachine\StringChunks; use JsonMachine\Tokens; @@ -589,4 +590,13 @@ public function testRecursiveParserDoesNotRequireChildParserToBeIteratedToTheEnd $this->expectExceptionMessage('generator'); iterator_to_array($array[1]); } + + public function testRecursiveIterationYieldsNestedIterator() + { + $iterator = new Parser(new Tokens(['[[1]]']), '', null, true); + + foreach ($iterator as $item) { + $this->assertInstanceOf(NestedIterator::class, $item); + } + } } From 01fc4347027493cf7bf5ac4ae908a1fb60d985e7 Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Thu, 30 Nov 2023 18:47:04 +0100 Subject: [PATCH 15/17] Readme update --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 65168d3..ee57db5 100644 --- a/README.md +++ b/README.md @@ -322,8 +322,8 @@ foreach ($fruits as $key => $value) { ### Recursive iteration (BETA) Recursive iteration can be enabled via `recursive` option set to `true`. -Every JSON iterable that JSON Machine encounters will then be yielded as a lazy instance of `Traversable`. -No JSON vector will be materialized and kept in memory. +Every JSON iterable that JSON Machine encounters will then be yielded as an instance of `NestedIterator`. +No JSON array or object will be materialized and kept in memory. The only PHP values you get materialized will be scalar values. Let's see an example with many, many users with many, many friends @@ -335,11 +335,10 @@ use JsonMachine\Items; $users = Items::fromFile('users.json', ['recursive' => true]); foreach ($users as $user) { // $user instanceof Traversable, not an array/object foreach ($user as $userField => $userValue) { - if ($userField == 'friends') { + if ($userField === 'friends') { foreach ($userValue as $friend) { // $userValue instanceof Traversable, not an array/object foreach ($friend as $friendField => $friendValue) { // $friend instanceof Traversable, not an array/object // do whatever you want here - // maybe rather use PHP's Recursive*Iterators } } } @@ -347,7 +346,8 @@ foreach ($users as $user) { // $user instanceof Traversable, not an array/object } ``` -> If you skip iteration of such lazy deeper-level `Traversable` and advance to a next value, you will not be able to iterate it later. +> If you break an iteration of such lazy deeper-level (i.e. you skip some `"friends"` via `break`) +> and advance to a next value (i.e. next `user`), you will not be able to iterate it later. > JSON Machine must iterate it the background to be able to read next value. > Such an attempt will result in closed generator exception. @@ -555,8 +555,8 @@ See [Recursive iteration](#recursive). ### "I am still out of luck" -It probably means that a single JSON string itself is too big to fit in memory. -For example very big file encoded as base64. +It probably means that a single JSON scalar string itself is too big to fit in memory. +For example very big base64-encoded file. In that case you will probably be still out of luck until JSON Machine supports yielding of scalar values as PHP streams. From 754d3609c08de7c9a2d09df3187ed4454dde9eee Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Fri, 1 Dec 2023 13:21:20 +0100 Subject: [PATCH 16/17] RecursiveItems facade --- src/FacadeTrait.php | 168 +++++++++++++++++++++++++++++ src/Items.php | 154 +------------------------- src/RecursiveItems.php | 18 ++++ test/JsonMachineTest/ItemsTest.php | 14 +-- 4 files changed, 190 insertions(+), 164 deletions(-) create mode 100644 src/FacadeTrait.php create mode 100644 src/RecursiveItems.php diff --git a/src/FacadeTrait.php b/src/FacadeTrait.php new file mode 100644 index 0000000..dbd8057 --- /dev/null +++ b/src/FacadeTrait.php @@ -0,0 +1,168 @@ +chunks = $bytesIterator; + $this->jsonPointer = $options['pointer']; + $this->jsonDecoder = $options['decoder']; + $this->debugEnabled = $options['debug']; + + if ($this->debugEnabled) { + $tokensClass = TokensWithDebugging::class; + } else { + $tokensClass = Tokens::class; + } + + $this->parser = new Parser( + new $tokensClass( + $this->chunks + ), + $this->jsonPointer, + $this->jsonDecoder ?: new ExtJsonDecoder(), + $this->recursive() + ); + } + + /** + * @param string $string + * + * @return self + * + * @throws InvalidArgumentException + */ + public static function fromString($string, array $options = []) + { + return new self(new StringChunks($string), $options); + } + + /** + * @param string $file + * + * @return self + * + * @throws Exception\InvalidArgumentException + */ + public static function fromFile($file, array $options = []) + { + return new self(new FileChunks($file), $options); + } + + /** + * @param resource $stream + * + * @return self + * + * @throws Exception\InvalidArgumentException + */ + public static function fromStream($stream, array $options = []) + { + return new self(new StreamChunks($stream), $options); + } + + /** + * @param iterable $iterable + * + * @return self + * + * @throws Exception\InvalidArgumentException + */ + public static function fromIterable($iterable, array $options = []) + { + return new self($iterable, $options); + } + + /** + * @return \Generator + * + * @throws Exception\PathNotFoundException + */ + #[\ReturnTypeWillChange] + public function getIterator() + { + return $this->parser->getIterator(); + } + + /** + * @throws Exception\JsonMachineException + */ + public function getPosition() + { + return $this->parser->getPosition(); + } + + public function getJsonPointers(): array + { + return $this->parser->getJsonPointers(); + } + + /** + * @throws Exception\JsonMachineException + */ + public function getCurrentJsonPointer(): string + { + return $this->parser->getCurrentJsonPointer(); + } + + /** + * @throws Exception\JsonMachineException + */ + public function getMatchedJsonPointer(): string + { + return $this->parser->getMatchedJsonPointer(); + } + + /** + * @return bool + */ + public function isDebugEnabled() + { + return $this->debugEnabled; + } +} diff --git a/src/Items.php b/src/Items.php index 37f2d10..5749f8a 100644 --- a/src/Items.php +++ b/src/Items.php @@ -4,163 +4,15 @@ namespace JsonMachine; -use JsonMachine\Exception\InvalidArgumentException; -use JsonMachine\JsonDecoder\ExtJsonDecoder; -use JsonMachine\JsonDecoder\ItemDecoder; - /** * Entry-point facade for JSON Machine. */ final class Items implements \IteratorAggregate, PositionAware { - /** - * @var iterable - */ - private $chunks; - - /** - * @var string - */ - private $jsonPointer; - - /** - * @var ItemDecoder|null - */ - private $jsonDecoder; - - /** - * @var Parser - */ - private $parser; - - /** - * @var bool - */ - private $debugEnabled; - - /** - * @param iterable $bytesIterator - * - * @throws InvalidArgumentException - */ - public function __construct($bytesIterator, array $options = []) - { - $options = new ItemsOptions($options); - - $this->chunks = $bytesIterator; - $this->jsonPointer = $options['pointer']; - $this->jsonDecoder = $options['decoder']; - $this->debugEnabled = $options['debug']; - - if ($this->debugEnabled) { - $tokensClass = TokensWithDebugging::class; - } else { - $tokensClass = Tokens::class; - } - - $this->parser = new Parser( - new $tokensClass( - $this->chunks - ), - $this->jsonPointer, - $this->jsonDecoder ?: new ExtJsonDecoder(), - $options['recursive'] - ); - } - - /** - * @param string $string - * - * @return self - * - * @throws InvalidArgumentException - */ - public static function fromString($string, array $options = []) - { - return new self(new StringChunks($string), $options); - } - - /** - * @param string $file - * - * @return self - * - * @throws Exception\InvalidArgumentException - */ - public static function fromFile($file, array $options = []) - { - return new self(new FileChunks($file), $options); - } - - /** - * @param resource $stream - * - * @return self - * - * @throws Exception\InvalidArgumentException - */ - public static function fromStream($stream, array $options = []) - { - return new self(new StreamChunks($stream), $options); - } - - /** - * @param iterable $iterable - * - * @return self - * - * @throws Exception\InvalidArgumentException - */ - public static function fromIterable($iterable, array $options = []) - { - return new self($iterable, $options); - } - - /** - * @return \Generator - * - * @throws Exception\PathNotFoundException - */ - #[\ReturnTypeWillChange] - public function getIterator() - { - return $this->parser->getIterator(); - } - - /** - * @throws Exception\JsonMachineException - */ - public function getPosition() - { - return $this->parser->getPosition(); - } - - public function getJsonPointers(): array - { - return $this->parser->getJsonPointers(); - } - - /** - * @throws Exception\JsonMachineException - */ - public function getCurrentJsonPointer(): string - { - return $this->parser->getCurrentJsonPointer(); - } - - /** - * @throws Exception\JsonMachineException - */ - public function getMatchedJsonPointer(): string - { - return $this->parser->getMatchedJsonPointer(); - } + use FacadeTrait; - /** - * @return bool - */ - public function isDebugEnabled() + protected function recursive(): bool { - return $this->debugEnabled; + return false; } } diff --git a/src/RecursiveItems.php b/src/RecursiveItems.php new file mode 100644 index 0000000..3d470ce --- /dev/null +++ b/src/RecursiveItems.php @@ -0,0 +1,18 @@ +assertSame(3, iterator_count($items)); } - - public function testRecursiveIteration() - { - $items = Items::fromString('[[":)"]]', ['recursive' => true]); - - foreach ($items as $emojis) { - $this->assertInstanceOf(Traversable::class, $emojis); - foreach ($emojis as $emoji) { - $this->assertSame(':)', $emoji); - } - } - } } From 860cda29422b83693d5a0f690b76954d815f6a2d Mon Sep 17 00:00:00 2001 From: Filip Halaxa Date: Mon, 26 Aug 2024 21:43:32 +0200 Subject: [PATCH 17/17] wip --- src/FacadeTrait.php | 126 +++++-------------- src/Items.php | 65 +++++++++- src/Parser.php | 16 ++- src/RecursiveItems.php | 114 ++++++++++++++++- test/JsonMachineTest/ItemsTest.php | 1 - test/JsonMachineTest/NestedIteratorTest.php | 15 --- test/JsonMachineTest/ParserTest.php | 9 -- test/JsonMachineTest/RecursiveItemsTest.json | 1 + test/JsonMachineTest/RecursiveItemsTest.php | 75 +++++++++++ 9 files changed, 286 insertions(+), 136 deletions(-) create mode 100644 test/JsonMachineTest/RecursiveItemsTest.json create mode 100644 test/JsonMachineTest/RecursiveItemsTest.php diff --git a/src/FacadeTrait.php b/src/FacadeTrait.php index dbd8057..412e984 100644 --- a/src/FacadeTrait.php +++ b/src/FacadeTrait.php @@ -10,21 +10,6 @@ trait FacadeTrait { - /** - * @var iterable - */ - private $chunks; - - /** - * @var string - */ - private $jsonPointer; - - /** - * @var ItemDecoder|null - */ - private $jsonDecoder; - /** * @var Parser */ @@ -35,100 +20,34 @@ trait FacadeTrait */ private $debugEnabled; - /** - * @todo Make private when PHP 7 stops being supported - */ - protected abstract function recursive(): bool; + public function isDebugEnabled(): bool + { + return $this->debugEnabled; + } /** * @param iterable $bytesIterator * * @throws InvalidArgumentException */ - public function __construct($bytesIterator, array $options = []) + private static function createParser($bytesIterator, ItemsOptions $options, bool $recursive): Parser { - $options = new ItemsOptions($options); - - $this->chunks = $bytesIterator; - $this->jsonPointer = $options['pointer']; - $this->jsonDecoder = $options['decoder']; - $this->debugEnabled = $options['debug']; - - if ($this->debugEnabled) { + if ($options['debug']) { $tokensClass = TokensWithDebugging::class; } else { $tokensClass = Tokens::class; } - $this->parser = new Parser( + return new Parser( new $tokensClass( - $this->chunks + $bytesIterator ), - $this->jsonPointer, - $this->jsonDecoder ?: new ExtJsonDecoder(), - $this->recursive() + $options['pointer'], + $options['decoder'] ?: new ExtJsonDecoder(), + $recursive ); } - /** - * @param string $string - * - * @return self - * - * @throws InvalidArgumentException - */ - public static function fromString($string, array $options = []) - { - return new self(new StringChunks($string), $options); - } - - /** - * @param string $file - * - * @return self - * - * @throws Exception\InvalidArgumentException - */ - public static function fromFile($file, array $options = []) - { - return new self(new FileChunks($file), $options); - } - - /** - * @param resource $stream - * - * @return self - * - * @throws Exception\InvalidArgumentException - */ - public static function fromStream($stream, array $options = []) - { - return new self(new StreamChunks($stream), $options); - } - - /** - * @param iterable $iterable - * - * @return self - * - * @throws Exception\InvalidArgumentException - */ - public static function fromIterable($iterable, array $options = []) - { - return new self($iterable, $options); - } - - /** - * @return \Generator - * - * @throws Exception\PathNotFoundException - */ - #[\ReturnTypeWillChange] - public function getIterator() - { - return $this->parser->getIterator(); - } - /** * @throws Exception\JsonMachineException */ @@ -159,10 +78,23 @@ public function getMatchedJsonPointer(): string } /** - * @return bool + * @param string $string */ - public function isDebugEnabled() - { - return $this->debugEnabled; - } + abstract public static function fromString($string, array $options = []): self; + + /** + * @param string $file + */ + abstract public static function fromFile($file, array $options = []): self; + + /** + * @param resource $stream + */ + abstract public static function fromStream($stream, array $options = []): self; + + /** + * @param iterable $iterable + */ + abstract public static function fromIterable($iterable, array $options = []): self; + } diff --git a/src/Items.php b/src/Items.php index 5749f8a..2136223 100644 --- a/src/Items.php +++ b/src/Items.php @@ -4,6 +4,8 @@ namespace JsonMachine; +use JsonMachine\Exception\InvalidArgumentException; + /** * Entry-point facade for JSON Machine. */ @@ -11,8 +13,67 @@ final class Items implements \IteratorAggregate, PositionAware { use FacadeTrait; - protected function recursive(): bool + /** + * @param iterable $bytesIterator + * + * @throws InvalidArgumentException + */ + public function __construct($bytesIterator, array $options = []) + { + $options = new ItemsOptions($options); + $this->debugEnabled = $options['debug']; + + $this->parser = $this->createParser($bytesIterator, $options, false); + } + + /** + * @param string $string + * + * @throws InvalidArgumentException + */ + public static function fromString($string, array $options = []): self + { + return new self(new StringChunks($string), $options); + } + + /** + * @param string $file + * + * @throws Exception\InvalidArgumentException + */ + public static function fromFile($file, array $options = []): self + { + return new self(new FileChunks($file), $options); + } + + /** + * @param resource $stream + * + * @throws Exception\InvalidArgumentException + */ + public static function fromStream($stream, array $options = []): self + { + return new self(new StreamChunks($stream), $options); + } + + /** + * @param iterable $iterable + * + * @throws Exception\InvalidArgumentException + */ + public static function fromIterable($iterable, array $options = []): self + { + return new self($iterable, $options); + } + + /** + * @return \Generator + * + * @throws Exception\PathNotFoundException + */ + #[\ReturnTypeWillChange] + public function getIterator() { - return false; + return $this->parser->getIterator(); } } diff --git a/src/Parser.php b/src/Parser.php index d93085d..80691aa 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -163,14 +163,12 @@ public function getIterator() ) ) { if ($this->recursive && ($token == '{' || $token == '[')) { - $jsonValue = new NestedIterator( - (new self( - $this->remainingTokens(), - '', - $this->jsonDecoder, - true - ))->getIterator() - ); + $jsonValue = (new self( + $this->remainingTokens(), + '', + $this->jsonDecoder, + true + ))->$this->getIterator(); $token = ' '; } else { $jsonValue .= $token; @@ -400,7 +398,7 @@ public function getCurrentJsonPointer(): string */ public function getMatchedJsonPointer(): string { - if ($this->matchedJsonPointer === null) { + if ($this->isOutsideGenerator()) { throw new JsonMachineException(__METHOD__.' must be called inside a loop'); } diff --git a/src/RecursiveItems.php b/src/RecursiveItems.php index 3d470ce..f8e1325 100644 --- a/src/RecursiveItems.php +++ b/src/RecursiveItems.php @@ -4,15 +4,123 @@ namespace JsonMachine; +use Iterator; +use JsonMachine\Exception\InvalidArgumentException; + /** * Entry-point facade for recursive iteration. */ -final class RecursiveItems implements \IteratorAggregate, PositionAware +final class RecursiveItems implements \RecursiveIterator, PositionAware { use FacadeTrait; - protected function recursive(): bool + /** @var Parser */ + private $parser; + + /** @var ItemsOptions */ + private $options; + + /** @var Iterator */ + private $parserIterator; + + public function __construct(Parser $parser, ItemsOptions $options) + { + $this->parser = $parser; + $this->options = $options; + $this->debugEnabled = $options['debug']; + } + + /** + * @throws InvalidArgumentException + */ + public static function fromString($string, array $options = []): self + { + $options = new ItemsOptions($options); + return new self( + self::createParser(new StringChunks($string), $options, true), + $options + ); + } + + /** + * @throws InvalidArgumentException + */ + public static function fromFile($file, array $options = []): self { - return true; + $options = new ItemsOptions($options); + return new self( + self::createParser(new FileChunks($file), $options, true), + $options + ); + } + + /** + * @throws InvalidArgumentException + */ + public static function fromStream($stream, array $options = []): self + { + $options = new ItemsOptions($options); + return new self( + self::createParser(new StreamChunks($stream), $options, true), + $options + ); + } + + /** + * @throws InvalidArgumentException + */ + public static function fromIterable($iterable, array $options = []): self + { + $options = new ItemsOptions($options); + return new self( + self::createParser($iterable, $options, true), + $options + ); + } + + public function current() + { + $current = $this->parserIterator->current(); + if ($current instanceof Parser) { + return new self($current, $this->options); + } + + return $current; + } + + public function next() + { + $this->parserIterator->next(); + } + + public function key() + { + return $this->parserIterator->key(); + } + + public function valid(): bool + { + return $this->parserIterator->valid(); + } + + public function rewind() + { + $this->parserIterator = $this->parser->getIterator(); + $this->parserIterator->rewind(); + } + + public function hasChildren(): bool + { + return $this->current() instanceof self; + } + + public function getChildren() + { + $current = $this->current(); + if ($current instanceof self) { + return $current; + } + + return null; } } diff --git a/test/JsonMachineTest/ItemsTest.php b/test/JsonMachineTest/ItemsTest.php index b68668a..9c4ab18 100644 --- a/test/JsonMachineTest/ItemsTest.php +++ b/test/JsonMachineTest/ItemsTest.php @@ -9,7 +9,6 @@ /** * @covers \JsonMachine\Items - * @covers \JsonMachine\RecursiveItems */ class ItemsTest extends \PHPUnit_Framework_TestCase { diff --git a/test/JsonMachineTest/NestedIteratorTest.php b/test/JsonMachineTest/NestedIteratorTest.php index faeffa9..e053505 100644 --- a/test/JsonMachineTest/NestedIteratorTest.php +++ b/test/JsonMachineTest/NestedIteratorTest.php @@ -46,21 +46,6 @@ public function testHasChildrenFollowsIterators() $this->assertSame([false, true, false], $result); } - public function testGetChildrenReturnsNestedIterator() - { - $generator = function () {yield from [1, new \ArrayIterator([]), 3]; }; - $iterator = new NestedIterator($generator()); - - $result = []; - foreach ($iterator as $item) { - $result[] = $iterator->getChildren(); - } - - $this->assertSame(null, $result[0]); - $this->assertInstanceOf(NestedIterator::class, $result[1]); - $this->assertSame(null, $result[2]); - } - public function testGetChildrenReturnsCorrectItems() { $generator = function () {yield from [1, new \ArrayIterator([2]), 3]; }; diff --git a/test/JsonMachineTest/ParserTest.php b/test/JsonMachineTest/ParserTest.php index b14b342..6a7d117 100644 --- a/test/JsonMachineTest/ParserTest.php +++ b/test/JsonMachineTest/ParserTest.php @@ -590,13 +590,4 @@ public function testRecursiveParserDoesNotRequireChildParserToBeIteratedToTheEnd $this->expectExceptionMessage('generator'); iterator_to_array($array[1]); } - - public function testRecursiveIterationYieldsNestedIterator() - { - $iterator = new Parser(new Tokens(['[[1]]']), '', null, true); - - foreach ($iterator as $item) { - $this->assertInstanceOf(NestedIterator::class, $item); - } - } } diff --git a/test/JsonMachineTest/RecursiveItemsTest.json b/test/JsonMachineTest/RecursiveItemsTest.json new file mode 100644 index 0000000..bfb8d7b --- /dev/null +++ b/test/JsonMachineTest/RecursiveItemsTest.json @@ -0,0 +1 @@ +{"path": {"key":["value"]}} diff --git a/test/JsonMachineTest/RecursiveItemsTest.php b/test/JsonMachineTest/RecursiveItemsTest.php new file mode 100644 index 0000000..2cf678c --- /dev/null +++ b/test/JsonMachineTest/RecursiveItemsTest.php @@ -0,0 +1,75 @@ + $args[1], + 'decoder' => $args[2], + 'debug' => $args[3], + ], + ]); + $this->assertSame($expected, iterator_to_array($iterator)); + } + + public function data_testFactories() + { + foreach ([true, false] as $debug) { + foreach ([ + [RecursiveItems::class, 'fromStream', fopen('data://text/plain,{"path": {"key":["value"]}}', 'r'), '/path', null, $debug], + [RecursiveItems::class, 'fromString', '{"path": {"key":["value"]}}', '/path', null, $debug], + [RecursiveItems::class, 'fromFile', __DIR__.'/RecursiveItemsTest.json', '/path', null, $debug], + [RecursiveItems::class, 'fromIterable', ['{"path": {"key', '":["value"]}}'], '/path', null, $debug], + [RecursiveItems::class, 'fromIterable', new \ArrayIterator(['{"path": {"key', '":["value"]}}']), '/path', null, $debug], + ] as $case) { + yield $case; + } + } + } + + public function testRecursiveIteration() + { + $items = RecursiveItems::fromString('[[":)"]]'); + + foreach ($items as $emojis) { + $this->assertInstanceOf(RecursiveItems::class, $emojis); + foreach ($emojis as $emoji) { + $this->assertSame(':)', $emoji); + } + } + } + + public function testGetChildrenReturnsNestedIterator() + { + $iterator = RecursiveItems::fromString("[1,[],1]"); + + $result = []; + foreach ($iterator as $item) { + $result[] = $iterator->getChildren(); + } + + $this->assertSame(null, $result[0]); + $this->assertInstanceOf(RecursiveItems::class, $result[1]); + $this->assertSame(null, $result[2]); + } + + public function testCurrentReturnsSameInstanceOfParser() + { + + } +}