-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
335 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
<?php | ||
/** | ||
* Author: Courtney Miles | ||
* Date: 15/08/18 | ||
* Time: 7:08 PM | ||
*/ | ||
|
||
namespace MilesAsylum\Slurp\Extract; | ||
|
||
|
||
use MilesAsylum\Slurp\Extract\Exception\MalformedCsvException; | ||
|
||
class CsvFileExtractor implements ExtractorInterface | ||
{ | ||
/** | ||
* @var string | ||
*/ | ||
protected $filePath; | ||
|
||
/** | ||
* @var bool|resource | ||
*/ | ||
protected $fileHandle; | ||
|
||
protected $columnNames = []; | ||
|
||
protected $firstRowValueCount; | ||
|
||
/** | ||
* @var int | ||
*/ | ||
private $lineLength; | ||
|
||
/** | ||
* @var string | ||
*/ | ||
private $delimiter; | ||
|
||
/** | ||
* @var string | ||
*/ | ||
private $enclosure; | ||
|
||
/** | ||
* @var string | ||
*/ | ||
private $escape; | ||
|
||
/** | ||
* @var array | ||
*/ | ||
protected $currentLine; | ||
|
||
protected $currentLineNo = null; | ||
|
||
/** | ||
* CsvFileExtractor constructor. | ||
* @param $filePath | ||
* @param $hasHeader | ||
* @param int $lineLength | ||
* @param string $delimiter | ||
* @param string $enclosure | ||
* @param string $escape | ||
* @throws MalformedCsvException | ||
*/ | ||
public function __construct( | ||
$filePath, | ||
$hasHeader, | ||
$lineLength = 0, | ||
$delimiter = ',', | ||
$enclosure = '"', | ||
$escape = '\\' | ||
) { | ||
$this->filePath = $filePath; | ||
$this->fileHandle = fopen($this->filePath, "r"); | ||
$this->lineLength = $lineLength; | ||
$this->delimiter = $delimiter; | ||
$this->enclosure = $enclosure; | ||
$this->escape = $escape; | ||
|
||
if ($hasHeader) { | ||
$this->columnNames = $this->loadCsvLine(); | ||
|
||
if (count($this->columnNames) !== count(array_unique($this->columnNames))) { | ||
throw new MalformedCsvException( | ||
"The loaded file {$this->filePath} contains duplicate column names." | ||
); | ||
} | ||
} | ||
|
||
$this->currentLine = $this->loadCsvLine(); | ||
$this->firstRowValueCount = count($this->currentLine); | ||
} | ||
|
||
public function getColumnNames() | ||
{ | ||
return $this->columnNames; | ||
} | ||
|
||
/** | ||
* @return array|false|null | ||
* @throws MalformedCsvException | ||
*/ | ||
public function current() | ||
{ | ||
if (!empty($this->columnNames)) { | ||
if (count($this->columnNames) !== count($this->currentLine)) { | ||
throw new MalformedCsvException( | ||
sprintf( | ||
'Line %s in %s has %s values where we expected %s.', | ||
$this->currentLineNo, | ||
$this->filePath, | ||
count($this->currentLine), | ||
count($this->columnNames) | ||
) | ||
); | ||
} | ||
return array_combine($this->columnNames, $this->currentLine); | ||
} elseif ($this->firstRowValueCount != count($this->currentLine)) { | ||
throw new MalformedCsvException( | ||
sprintf( | ||
'Line %s in %s has %s values where previous rows had %s.', | ||
$this->currentLineNo, | ||
$this->filePath, | ||
count($this->currentLine), | ||
$this->firstRowValueCount | ||
) | ||
); | ||
} | ||
|
||
return $this->currentLine; | ||
} | ||
|
||
public function next() | ||
{ | ||
$this->currentLine = $this->loadCsvLine(); | ||
} | ||
|
||
public function key() | ||
{ | ||
return $this->currentLineNo; | ||
} | ||
|
||
public function valid() | ||
{ | ||
return $this->currentLine !== false; | ||
} | ||
|
||
public function rewind() | ||
{ | ||
if ($this->currentLineNo > 1) { | ||
$this->currentLineNo = null; | ||
rewind($this->fileHandle); | ||
$this->loadCsvLine(); // Skip the line containing headers. | ||
$this->currentLine = $this->loadCsvLine(); | ||
} | ||
} | ||
|
||
protected function loadCsvLine() | ||
{ | ||
if ($this->currentLineNo === null) { | ||
$this->currentLineNo = 0; | ||
} else { | ||
$this->currentLineNo++; | ||
} | ||
|
||
return fgetcsv($this->fileHandle, $this->lineLength, $this->delimiter, $this->enclosure, $this->escape); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
<?php | ||
/** | ||
* Author: Courtney Miles | ||
* Date: 15/08/18 | ||
* Time: 11:30 PM | ||
*/ | ||
|
||
namespace MilesAsylum\Slurp\Extract\Exception; | ||
|
||
interface ExceptionInterface | ||
{ | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
<?php | ||
/** | ||
* Author: Courtney Miles | ||
* Date: 15/08/18 | ||
* Time: 11:30 PM | ||
*/ | ||
|
||
namespace MilesAsylum\Slurp\Extract\Exception; | ||
|
||
class MalformedCsvException extends \Exception implements ExceptionInterface | ||
{ | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
<?php | ||
/** | ||
* Author: Courtney Miles | ||
* Date: 15/08/18 | ||
* Time: 10:15 PM | ||
*/ | ||
|
||
namespace MilesAsylum\Slurp\Tests\Slurp\Extract; | ||
|
||
use MilesAsylum\Slurp\Extract\CsvFileExtractor; | ||
use MilesAsylum\Slurp\Extract\Exception\MalformedCsvException; | ||
use PHPUnit\Framework\TestCase; | ||
|
||
class CsvFileExtractorTest extends TestCase | ||
{ | ||
public function testGetColumnNamesWithHeaders() | ||
{ | ||
$csv = new CsvFileExtractor(__DIR__ . '/_fixture/with_headers.csv', true); | ||
|
||
$this->assertSame( | ||
['user','date','class','value'], | ||
$csv->getColumnNames() | ||
); | ||
} | ||
|
||
public function testGetColumnNamesWithoutHeaders() | ||
{ | ||
$csv = new CsvFileExtractor(__DIR__ . '/_fixture/without_headers.csv', false); | ||
|
||
$this->assertSame( | ||
[], | ||
$csv->getColumnNames() | ||
); | ||
} | ||
|
||
public function testIterateFileWithHeaders() | ||
{ | ||
$csv = new CsvFileExtractor(__DIR__ . '/_fixture/with_headers.csv', true); | ||
|
||
$this->assertSame( | ||
[ | ||
1 => [ | ||
'user' => 'user123', | ||
'date' => '2018-01-01', | ||
'class' => 'foo', | ||
'value' => '123.45' | ||
], | ||
2 => [ | ||
'user' => 'user456', | ||
'date' => '2018-02-01', | ||
'class' => 'bar', | ||
'value' => '678.90' | ||
] | ||
], | ||
$this->iterateCsv($csv) | ||
); | ||
} | ||
|
||
/** | ||
* @depends testIterateFileWithHeaders | ||
*/ | ||
public function testRewindIterator() | ||
{ | ||
$csv = new CsvFileExtractor(__DIR__ . '/_fixture/with_headers.csv', true); | ||
|
||
$this->assertSame($this->iterateCsv($csv), $this->iterateCsv($csv)); | ||
} | ||
|
||
public function testIterateFileWithoutHeaders() | ||
{ | ||
$csv = new CsvFileExtractor(__DIR__ . '/_fixture/without_headers.csv', false); | ||
|
||
$this->assertSame( | ||
[ | ||
0 => ['user123', '2018-01-01', 'foo', '123.45'], | ||
1 => ['user456', '2018-02-01', 'bar', '678.90'] | ||
], | ||
$this->iterateCsv($csv) | ||
); | ||
} | ||
|
||
public function testIterateFileWithMismatchColumnCount() | ||
{ | ||
$this->expectException(MalformedCsvException::class); | ||
$this->expectExceptionMessage( | ||
'Line 1 in ' . __DIR__ . '/_fixture/mismatch_column_count.csv has 5 values where we expected 4.' | ||
); | ||
|
||
$csv = new CsvFileExtractor(__DIR__ . '/_fixture/mismatch_column_count.csv', true); | ||
|
||
$this->iterateCsv($csv); | ||
} | ||
|
||
public function testIterateFileWithVaryingColumnCount() | ||
{ | ||
$this->expectException(MalformedCsvException::class); | ||
$this->expectExceptionMessage( | ||
'Line 1 in ' . __DIR__ . '/_fixture/varying_column_count.csv has 3 values where previous rows had 4.' | ||
); | ||
|
||
$csv = new CsvFileExtractor(__DIR__ . '/_fixture/varying_column_count.csv', false); | ||
|
||
$this->iterateCsv($csv); | ||
} | ||
|
||
public function testConstructFileWithDuplicateColumnNames() | ||
{ | ||
$this->expectException(MalformedCsvException::class); | ||
$this->expectExceptionMessage( | ||
'The loaded file ' . __DIR__ . '/_fixture/duplicate_headers.csv contains duplicate column names.' | ||
); | ||
|
||
new CsvFileExtractor(__DIR__ . '/_fixture/duplicate_headers.csv', true); | ||
} | ||
|
||
protected function iterateCsv(CsvFileExtractor $csv) | ||
{ | ||
$results = []; | ||
|
||
foreach ($csv as $linoNo => $arr) { | ||
$results[$linoNo] = $arr; | ||
} | ||
|
||
return $results; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
user,user,date,class,value | ||
user123,user123,2018-01-01,foo,123.45 | ||
user456,user456,2018-02-01,bar,678.90 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
user,date,class,value | ||
user123,2018-01-01,foo,123.45,111 | ||
user456,2018-02-01,bar,678.90,222 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
user123,2018-01-01,foo,123.45 | ||
user456,2018-02-01,bar |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
user,date,class,value | ||
user123,2018-01-01,foo,123.45 | ||
user456,2018-02-01,bar,678.90 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
user123,2018-01-01,foo,123.45 | ||
user456,2018-02-01,bar,678.90 |