Skip to content

Commit

Permalink
Update namespace and simplify code
Browse files Browse the repository at this point in the history
  • Loading branch information
nyamsprod committed Dec 10, 2023
1 parent 52d6a4e commit 0118336
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 82 deletions.
44 changes: 22 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
tabular data represented as HTML Table. Once installed you will be able to do the following:

```php
use Bakame\HtmlTable\Parser;
use Bakame\TabularData\HtmlTable\Parser;

$table = Parser::new()
->tableHeader(['rank', 'move', 'team', 'player', 'won', 'drawn', 'lost', 'for', 'against', 'gd', 'points'])
Expand Down Expand Up @@ -57,7 +57,7 @@ for more information.
**The `Parser` constructor is private to instantiate the object you are required to use the `new` method instead**

```php
use Bakame\HtmlTable\Parser;
use Bakame\TabularData\HtmlTable\Parser;

$parser = Parser::new()
->ignoreTableHeader()
Expand All @@ -72,7 +72,7 @@ To extract and parse your table use either the `parseHtml` or `parseFile` method
If parsing is not possible a `ParseError` exception will be thrown.

```php
use Bakame\HtmlTable\Parser;
use Bakame\TabularData\HtmlTable\Parser;

$parser = Parser::new();

Expand All @@ -97,7 +97,7 @@ Both methods return a `Table` instance which implements the `League\Csv\TabularD
interface and also give access to the table caption if present via the `getCaption` method.

```php
use Bakame\HtmlTable\Parser;
use Bakame\TabularData\HtmlTable\Parser;

$html = <<<HTML
<div>
Expand Down Expand Up @@ -161,7 +161,7 @@ favor `Parser::tableXpathPosition` which expects an `xpath` expression.
If the expression is valid, and a list of table is found, the first result will be returned.

```php
use Bakame\HtmlTable\Parser;
use Bakame\TabularData\HtmlTable\Parser;

$parser = Parser::new()->tablePosition('table-id'); // parses the <table id='table-id'>
$parser = Parser::new()->tablePosition(3); // parses the 4th table of the page
Expand All @@ -177,7 +177,7 @@ recommended to use one or the other but not both at the same time.**
You can optionally define a caption for your table if none is present or found during parsing.

```php
use Bakame\HtmlTable\Parser;
use Bakame\TabularData\HtmlTable\Parser;

$parser = Parser::new()->tableCaption('this is a generated caption');
$parser = Parser::new()->tableCaption(null); // remove any default caption set
Expand All @@ -194,18 +194,18 @@ But you can override this behaviour using one of these settings:
Tells where to locate and resolve the table header

```php
use Bakame\HtmlTable\Parser;
use Bakame\HtmlTable\Section;
use Bakame\TabularData\HtmlTable\Parser;
use Bakame\TabularData\HtmlTable\Section;

$parser = Parser::new()->tableHeaderPosition(Section::thead, 3);
$parser = Parser::new()->tableHeaderPosition(Section::Thead, 3);
// header is the 4th row in the <thead> table section
```

The method uses the `Bakame\HtmlTable\Section` enum to designate which table section to use
The method uses the `Bakame\TabularData\HtmlTable\Section` enum to designate which table section to use
to resolve the header

```php
use Bakame\HtmlTable\Section;
use Bakame\TabularData\HtmlTable\Section;

enum Section
{
Expand All @@ -225,7 +225,7 @@ Instructs the parser to resolve or not the table header using `tableHeaderPositi
If no resolution is done, no header will be included in the returned `Table` instance.

```php
use Bakame\HtmlTable\Parser;
use Bakame\TabularData\HtmlTable\Parser;

$parser = Parser::new()->ignoreTableHeader(); // no table header will be resolved
$parser = Parser::new()->resolveTableHeader(); // will attempt to resolve the table header
Expand All @@ -237,8 +237,8 @@ You can specify directly the header of your table and override any other table h
related configuration with this configuration

```php
use Bakame\HtmlTable\Parser;
use Bakame\HtmlTable\Section;
use Bakame\TabularData\HtmlTable\Parser;
use Bakame\TabularData\HtmlTable\Section;

$parser = Parser::new()->tableHeader(['rank', 'team', 'winner']);
```
Expand All @@ -251,8 +251,8 @@ You can skip or re-arrange the source columns by skipping them by their offsets
re-ordering the offsets.

```php
use Bakame\HtmlTable\Parser;
use Bakame\HtmlTable\Section;
use Bakame\TabularData\HtmlTable\Parser;
use Bakame\TabularData\HtmlTable\Section;

$parser = Parser::new()->tableHeader([3 => 'rank', 7 => 'winner', 5 => 'team']);
// only 3 column will be extracted the 4th, 6th and 8th columns
Expand All @@ -265,11 +265,11 @@ $parser = Parser::new()->tableHeader([3 => 'rank', 7 => 'winner', 5 => 'team'])
Tells which section should be parsed based on the `Section` enum

```php
use Bakame\HtmlTable\Parser;
use Bakame\HtmlTable\Section;
use Bakame\TabularData\HtmlTable\Parser;
use Bakame\TabularData\HtmlTable\Section;

$parser = Parser::new()->includeSection(Section::tbody); // thead and tfoot are included during parsing
$parser = Parser::new()->excludeSection(Section::tr, Section::tfoot); // table direct tr children and tfoot are not included during parsing
$parser = Parser::new()->includeSection(Section::Tbody); // thead and tfoot are included during parsing
$parser = Parser::new()->excludeSection(Section::Tr, Section::Tfoot); // table direct tr children and tfoot are not included during parsing
```

**By default, the `thead` section is not parse. If a `thead` row is selected to be the header, it will
Expand All @@ -292,7 +292,7 @@ Adds or remove a record formatter applied to the data extracted from the table b
can access it. The header is not affected by the formatter if it is defined.

```php
use Bakame\HtmlTable\Parser;
use Bakame\TabularData\HtmlTable\Parser;

$parser = Parser::new()->withFormatter($formatter); // attach a formatter to the parser
$parser = Parser::new()->withoutFormatter(); // removed the attached formatter if it exists
Expand Down Expand Up @@ -331,7 +331,7 @@ $formatter = function (array $record): array {
Tells whether the parser should ignore or throw in case of malformed HTML content.

```php
use Bakame\HtmlTable\Parser;
use Bakame\TabularData\HtmlTable\Parser;

$parser = Parser::new()->ignoreXmlErrors(); // ignore the XML errors
$parser = Parser::new()->failOnXmlErrors(3); // throw on XML errors
Expand Down
96 changes: 57 additions & 39 deletions src/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

declare(strict_types=1);

namespace Bakame\HtmlTable;
namespace Bakame\TabularData\HtmlTable;

use ArrayIterator;
use Bakame\Aide\Error\Cloak;
use Closure;
use DOMDocument;
use DOMElement;
Expand All @@ -24,16 +25,12 @@
use function array_shift;
use function array_unique;
use function fclose;
use function fopen;
use function in_array;
use function is_resource;
use function libxml_clear_errors;
use function libxml_get_errors;
use function libxml_use_internal_errors;
use function preg_match;
use function restore_error_handler;
use function set_error_handler;
use function stream_get_contents;
use function strtolower;

final class Parser
Expand All @@ -43,7 +40,7 @@ final class Parser

/**
* @param array<string> $tableHeader
* @param array<string, int> $includedSections
* @param array<Section> $includedSections
*/
private function __construct(
private readonly string $tableExpression,
Expand All @@ -65,18 +62,23 @@ public static function new(): self
[],
false,
'(//table/thead/tr)[1]',
[Section::tbody->value => 1, Section::tr->value => 1, Section::tfoot->value => 1],
[Section::Tbody, Section::Tfoot, Section::Tr],
null,
false,
);
}

public function tableXPathPosition(string $expression): self
{
set_error_handler(fn (int $errno, string $errstr, string $errfile, int $errline) => true);
$newInstace = match (true) {
$query = (new DOMXPath(new DOMDocument()))->query(...);
$domXPath = Cloak::warning($query);

return match (true) {
$expression === $this->tableExpression => $this,
false === (new DOMXPath(new DOMDocument()))->query($expression) => throw new ParserError('The xpath expression `'.$expression.'` is invalie.'),
false === $domXPath($expression) => throw new ParserError(
message: 'The xpath expression `'.$expression.'` is invalid.',
previous: $domXPath->errors()->last()
),
default => new self(
$expression,
$this->caption,
Expand All @@ -88,9 +90,6 @@ public function tableXPathPosition(string $expression): self
$this->throwOnXmlErrors,
),
};
restore_error_handler();

return $newInstace;
}

/**
Expand Down Expand Up @@ -188,17 +187,28 @@ public function tableHeaderPosition(Section $section, int $offset = 0): self
};
}

public function includeAllSections(): self
{
return $this->includeSection(...Section::cases());
}

public function excludeAllSections(): self
{
return $this->excludeSection(...Section::cases());
}

public function includeSection(Section ...$sections): self
{
$includedSections = array_reduce(
$sections,
function (array $carry, Section $section) {
$carry[$section->value] = 1;
$current = [];
foreach ($this->includedSections as $section) {
$current[$section->value] = $section;
}
foreach ($sections as $section) {
$current[$section->value] = $section;
}

return $carry;
},
$this->includedSections
);
ksort($current);
$includedSections = array_values($current);

return match ($this->includedSections) {
$includedSections => $this,
Expand All @@ -217,15 +227,17 @@ function (array $carry, Section $section) {

public function excludeSection(Section ...$sections): self
{
$includedSections = array_reduce(
$sections,
function (array $carry, Section $section) {
unset($carry[$section->value]);
$current = [];
foreach ($this->includedSections as $section) {
$current[$section->value] = $section;
}

return $carry;
},
$this->includedSections
);
foreach ($sections as $section) {
if (array_key_exists($section->value, $current)) {
unset($current[$section->value]);
}
}
$includedSections = array_values($current);

return match ($this->includedSections) {
$includedSections => $this,
Expand Down Expand Up @@ -337,15 +349,17 @@ public function parseFile(mixed $filenameOrStream, $filenameContext = null): Tab
return $this->parseHtml($this->streamToString($filenameOrStream));
}

set_error_handler(fn (int $errno, string $errstr, string $errfile, int $errline) => true);
$resource = fopen(...match ($filenameContext) {
$fopen = Cloak::warning(fopen(...));
$resource = $fopen(...match ($filenameContext) {
null => [$filenameOrStream, 'r'],
default => [$filenameOrStream, 'r', false, $filenameContext],
});
restore_error_handler();

if (!is_resource($resource)) {
throw new ParserError('`'.$filenameOrStream.'`: failed to open stream: No such file or directory.');
throw new ParserError(
message: '`'.$filenameOrStream.'`: failed to open stream: No such file or directory.',
previous: $fopen->errors()->last()
);
}

$html = $this->streamToString($resource);
Expand Down Expand Up @@ -393,12 +407,12 @@ public function parseHtml(DOMDocument|DOMElement|SimpleXMLElement|Stringable|str
*/
private function streamToString($stream): string
{
set_error_handler(fn (int $errno, string $errstr, string $errfile, int $errline) => true);
$html = stream_get_contents($stream);
restore_error_handler();
$stream_get_contents = Cloak::warning(stream_get_contents(...));
/** @var string|false $html */
$html = $stream_get_contents($stream);

return match (false) {
$html => throw new ParserError('The resource could not be read.'),
$html => throw new ParserError('The resource could not be read.', 0, $stream_get_contents->errors()->last()),
default => $html,
};
}
Expand Down Expand Up @@ -474,7 +488,7 @@ private function extractTableContents(DOMXPath $xpath, array $header): Iterator
continue;
}

if (Section::tr === $section && null !== ($record = $this->filterRecord($childNode))) {
if (Section::Tr === $section && null !== ($record = $this->filterRecord($childNode))) {
$iterator->append($this->formatRecord($this->extractRecord($record, $rowSpan), $header));
continue;
}
Expand All @@ -492,7 +506,11 @@ private function extractTableContents(DOMXPath $xpath, array $header): Iterator

private function isIncludedSection(?Section $nodeName): bool
{
return array_key_exists($nodeName?->value ?? '', $this->includedSections);
if (null === $nodeName) {
return false;
}

return in_array($nodeName, $this->includedSections, true);
}

private function filterRecord(DOMNode $tr): ?DOMElement
Expand Down
2 changes: 1 addition & 1 deletion src/ParserError.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

declare(strict_types=1);

namespace Bakame\HtmlTable;
namespace Bakame\TabularData\HtmlTable;

use InvalidArgumentException;
use LibXMLError;
Expand Down
2 changes: 1 addition & 1 deletion src/ParserErrorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

declare(strict_types=1);

namespace Bakame\HtmlTable;
namespace Bakame\TabularData\HtmlTable;

use PHPUnit\Framework\Attributes\Test;
use PHPUnit\Framework\TestCase;
Expand Down

0 comments on commit 0118336

Please sign in to comment.