Skip to content

Commit

Permalink
Merge e7529ba into 6080327
Browse files Browse the repository at this point in the history
  • Loading branch information
courtney-miles committed Aug 15, 2018
2 parents 6080327 + e7529ba commit 18bd080
Show file tree
Hide file tree
Showing 10 changed files with 335 additions and 1 deletion.
169 changes: 169 additions & 0 deletions src/Extract/CsvFileExtractor.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
<?php
/**
* Author: Courtney Miles
* Date: 15/08/18
* Time: 7:08 PM
*/

namespace MilesAsylum\Slurp\Extract;


use MilesAsylum\Slurp\Extract\Exception\MalformedCsvException;

class CsvFileExtractor implements ExtractorInterface
{
/**
* @var string
*/
protected $filePath;

/**
* @var bool|resource
*/
protected $fileHandle;

protected $columnNames = [];

protected $firstRowValueCount;

/**
* @var int
*/
private $lineLength;

/**
* @var string
*/
private $delimiter;

/**
* @var string
*/
private $enclosure;

/**
* @var string
*/
private $escape;

/**
* @var array
*/
protected $currentLine;

protected $currentLineNo = null;

/**
* CsvFileExtractor constructor.
* @param $filePath
* @param $hasHeader
* @param int $lineLength
* @param string $delimiter
* @param string $enclosure
* @param string $escape
* @throws MalformedCsvException
*/
public function __construct(
$filePath,
$hasHeader,
$lineLength = 0,
$delimiter = ',',
$enclosure = '"',
$escape = '\\'
) {
$this->filePath = $filePath;
$this->fileHandle = fopen($this->filePath, "r");
$this->lineLength = $lineLength;
$this->delimiter = $delimiter;
$this->enclosure = $enclosure;
$this->escape = $escape;

if ($hasHeader) {
$this->columnNames = $this->loadCsvLine();

if (count($this->columnNames) !== count(array_unique($this->columnNames))) {
throw new MalformedCsvException(
"The loaded file {$this->filePath} contains duplicate column names."
);
}
}

$this->currentLine = $this->loadCsvLine();
$this->firstRowValueCount = count($this->currentLine);
}

public function getColumnNames()
{
return $this->columnNames;
}

/**
* @return array|false|null
* @throws MalformedCsvException
*/
public function current()
{
if (!empty($this->columnNames)) {
if (count($this->columnNames) !== count($this->currentLine)) {
throw new MalformedCsvException(
sprintf(
'Line %s in %s has %s values where we expected %s.',
$this->currentLineNo,
$this->filePath,
count($this->currentLine),
count($this->columnNames)
)
);
}
return array_combine($this->columnNames, $this->currentLine);
} elseif ($this->firstRowValueCount != count($this->currentLine)) {
throw new MalformedCsvException(
sprintf(
'Line %s in %s has %s values where previous rows had %s.',
$this->currentLineNo,
$this->filePath,
count($this->currentLine),
$this->firstRowValueCount
)
);
}

return $this->currentLine;
}

public function next()
{
$this->currentLine = $this->loadCsvLine();
}

public function key()
{
return $this->currentLineNo;
}

public function valid()
{
return $this->currentLine !== false;
}

public function rewind()
{
if ($this->currentLineNo > 1) {
$this->currentLineNo = null;
rewind($this->fileHandle);
$this->loadCsvLine(); // Skip the line containing headers.
$this->currentLine = $this->loadCsvLine();
}
}

protected function loadCsvLine()
{
if ($this->currentLineNo === null) {
$this->currentLineNo = 0;
} else {
$this->currentLineNo++;
}

return fgetcsv($this->fileHandle, $this->lineLength, $this->delimiter, $this->enclosure, $this->escape);
}
}
13 changes: 13 additions & 0 deletions src/Extract/Exception/ExceptionInterface.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?php
/**
* Author: Courtney Miles
* Date: 15/08/18
* Time: 11:30 PM
*/

namespace MilesAsylum\Slurp\Extract\Exception;

interface ExceptionInterface
{

}
13 changes: 13 additions & 0 deletions src/Extract/Exception/MalformedCsvException.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?php
/**
* Author: Courtney Miles
* Date: 15/08/18
* Time: 11:30 PM
*/

namespace MilesAsylum\Slurp\Extract\Exception;

class MalformedCsvException extends \Exception implements ExceptionInterface
{

}
2 changes: 1 addition & 1 deletion src/Extract/ExtractorInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ interface ExtractorInterface extends \Iterator
* Get the defined columns for the source.
* @return array
*/
public function getColumns();
public function getColumnNames();
}
126 changes: 126 additions & 0 deletions tests/Slurp/Extract/CsvFileExtractorTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
<?php
/**
* Author: Courtney Miles
* Date: 15/08/18
* Time: 10:15 PM
*/

namespace MilesAsylum\Slurp\Tests\Slurp\Extract;

use MilesAsylum\Slurp\Extract\CsvFileExtractor;
use MilesAsylum\Slurp\Extract\Exception\MalformedCsvException;
use PHPUnit\Framework\TestCase;

class CsvFileExtractorTest extends TestCase
{
public function testGetColumnNamesWithHeaders()
{
$csv = new CsvFileExtractor(__DIR__ . '/_fixture/with_headers.csv', true);

$this->assertSame(
['user','date','class','value'],
$csv->getColumnNames()
);
}

public function testGetColumnNamesWithoutHeaders()
{
$csv = new CsvFileExtractor(__DIR__ . '/_fixture/without_headers.csv', false);

$this->assertSame(
[],
$csv->getColumnNames()
);
}

public function testIterateFileWithHeaders()
{
$csv = new CsvFileExtractor(__DIR__ . '/_fixture/with_headers.csv', true);

$this->assertSame(
[
1 => [
'user' => 'user123',
'date' => '2018-01-01',
'class' => 'foo',
'value' => '123.45'
],
2 => [
'user' => 'user456',
'date' => '2018-02-01',
'class' => 'bar',
'value' => '678.90'
]
],
$this->iterateCsv($csv)
);
}

/**
* @depends testIterateFileWithHeaders
*/
public function testRewindIterator()
{
$csv = new CsvFileExtractor(__DIR__ . '/_fixture/with_headers.csv', true);

$this->assertSame($this->iterateCsv($csv), $this->iterateCsv($csv));
}

public function testIterateFileWithoutHeaders()
{
$csv = new CsvFileExtractor(__DIR__ . '/_fixture/without_headers.csv', false);

$this->assertSame(
[
0 => ['user123', '2018-01-01', 'foo', '123.45'],
1 => ['user456', '2018-02-01', 'bar', '678.90']
],
$this->iterateCsv($csv)
);
}

public function testIterateFileWithMismatchColumnCount()
{
$this->expectException(MalformedCsvException::class);
$this->expectExceptionMessage(
'Line 1 in ' . __DIR__ . '/_fixture/mismatch_column_count.csv has 5 values where we expected 4.'
);

$csv = new CsvFileExtractor(__DIR__ . '/_fixture/mismatch_column_count.csv', true);

$this->iterateCsv($csv);
}

public function testIterateFileWithVaryingColumnCount()
{
$this->expectException(MalformedCsvException::class);
$this->expectExceptionMessage(
'Line 1 in ' . __DIR__ . '/_fixture/varying_column_count.csv has 3 values where previous rows had 4.'
);

$csv = new CsvFileExtractor(__DIR__ . '/_fixture/varying_column_count.csv', false);

$this->iterateCsv($csv);
}

public function testConstructFileWithDuplicateColumnNames()
{
$this->expectException(MalformedCsvException::class);
$this->expectExceptionMessage(
'The loaded file ' . __DIR__ . '/_fixture/duplicate_headers.csv contains duplicate column names.'
);

new CsvFileExtractor(__DIR__ . '/_fixture/duplicate_headers.csv', true);
}

protected function iterateCsv(CsvFileExtractor $csv)
{
$results = [];

foreach ($csv as $linoNo => $arr) {
$results[$linoNo] = $arr;
}

return $results;
}
}
3 changes: 3 additions & 0 deletions tests/Slurp/Extract/_fixture/duplicate_headers.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
user,user,date,class,value
user123,user123,2018-01-01,foo,123.45
user456,user456,2018-02-01,bar,678.90
3 changes: 3 additions & 0 deletions tests/Slurp/Extract/_fixture/mismatch_column_count.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
user,date,class,value
user123,2018-01-01,foo,123.45,111
user456,2018-02-01,bar,678.90,222
2 changes: 2 additions & 0 deletions tests/Slurp/Extract/_fixture/varying_column_count.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
user123,2018-01-01,foo,123.45
user456,2018-02-01,bar
3 changes: 3 additions & 0 deletions tests/Slurp/Extract/_fixture/with_headers.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
user,date,class,value
user123,2018-01-01,foo,123.45
user456,2018-02-01,bar,678.90
2 changes: 2 additions & 0 deletions tests/Slurp/Extract/_fixture/without_headers.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
user123,2018-01-01,foo,123.45
user456,2018-02-01,bar,678.90

0 comments on commit 18bd080

Please sign in to comment.