Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ function from_csv(
?string $separator = null,
?string $enclosure = null,
?string $escape = null,
int $characters_read_in_line = 1000,
int $characters_read_in_line = 10 * 1024 * 1024,
?Schema $schema = null,
): CSVExtractor {
$loader = (new CSVExtractor(is_string($path) ? path_real($path) : $path))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Adapter\CSV\Tests\Double;

use Flow\Filesystem\Path;
use Flow\Filesystem\SourceStream;
use Generator;

use function explode;
use function strlen;
use function substr;

final class LengthCapturingSourceStream implements SourceStream
{
/**
* @var array<null|int> the length argument captured on each readLines() call
*/
public array $capturedLengths = [];

public function __construct(
private readonly string $contents,
private readonly Path $path,
) {}

public function close(): void {}

public function content(): string
{
return $this->contents;
}

public function isOpen(): bool
{
return true;
}

public function iterate(int $length = 1): Generator
{
for ($i = 0; $i < strlen($this->contents); $i += $length) {
yield substr($this->contents, $i, $length);
}
}

public function path(): Path
{
return $this->path;
}

public function read(int $length, int $offset): string
{
return substr($this->contents, $offset, $length);
}

public function readLines(string $separator = "\n", ?int $length = null): Generator
{
$this->capturedLengths[] = $length;

foreach (explode($separator, $this->contents) as $line) {
yield $line;
}
}

public function size(): int
{
return strlen($this->contents);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,33 @@
namespace Flow\ETL\Adapter\CSV\Tests\Unit;

use Flow\ETL\Adapter\CSV\CSVLineReader;
use Flow\ETL\Adapter\CSV\Tests\Double\LengthCapturingSourceStream;
use Flow\Filesystem\Stream\MemorySourceStream;
use PHPUnit\Framework\Attributes\TestWith;

Check warning on line 10 in src/adapter/etl-adapter-csv/tests/Flow/ETL/Adapter/CSV/Tests/Unit/CSVLineReaderTest.php

View workflow job for this annotation

GitHub Actions / static-analyze / static-analyze (locked, 8.3, ubuntu-latest)

no-redundant-use

Unused import: `TestWith`. >`TestWith` is imported but never used. >Unused `use` statement. Help: Remove the entire `use` statement.
use PHPUnit\Framework\TestCase;

use function Flow\Filesystem\DSL\path;

final class CSVLineReaderTest extends TestCase
{
public function test_characters_read_in_line_is_passed_through_to_the_stream(): void
{
$stream = new LengthCapturingSourceStream("id,name\n1,foo", path('s3://bucket/users.csv'));

iterator_to_array((new CSVLineReader('"', 4096))->readLines($stream));

static::assertSame([4096], $stream->capturedLengths);
}

public function test_null_characters_read_in_line_lets_the_stream_choose_its_default(): void
{
$stream = new LengthCapturingSourceStream("id,name\n1,foo", path('s3://bucket/users.csv'));

iterator_to_array((new CSVLineReader('"'))->readLines($stream));

static::assertSame([null], $stream->capturedLengths);
}

public function test_detection_of_multiline_quotes(): void
{
$simpleContent = "field1,field2\nvalue1,value2";
Expand Down
2 changes: 1 addition & 1 deletion web/landing/resources/dsl.json

Large diffs are not rendered by default.

Loading