-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* New "charData" data file built from unicode.org containing whitespa…
…ce & confusable chars confusable characters are now highlighted * Refactor dumpers.. breakout object methods to new own classes. New namespaces for base, text, & textAnsi * drop Utf8Dump utility class * array keys and object property names are now abstracted if contain non-utf8 characters * PDO / mysqli transaction methods no longer open / close a group.. now create an info logEntry * trace() method - new limit param / params may be passed in any order
- Loading branch information
Showing
132 changed files
with
12,495 additions
and
3,100 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
<?php | ||
|
||
namespace bdk\Debug\Dev; | ||
|
||
/** | ||
* Pull latest confusables.txt from unicode and save to php file | ||
* | ||
* @psalm-import-type charInfo from \bdk\Debug\Plugin\CharHighlight | ||
*/ | ||
class UpdateCharData | ||
{ | ||
/** @var string */ | ||
public static $filepathSrc = 'https://www.unicode.org/Public/security/latest/confusables.txt'; | ||
|
||
/** @var array<string, charInfo> */ | ||
protected $charData = array(); | ||
|
||
/** | ||
* Update confusableData.php | ||
* | ||
* @return void | ||
*/ | ||
public static function update() | ||
{ | ||
$filepathOut = __DIR__ . '/../src/Debug/Dump/charData.php'; | ||
$comment = '/** | ||
* This file is generated automatically from confusables.txt | ||
* https://www.unicode.org/Public/security/latest/confusables.txt | ||
* | ||
* `composer run update-char-data` | ||
* | ||
* @phpcs:disable SlevomatCodingStandard.Arrays.AlphabeticallySortedByKeys | ||
*/'; | ||
$php = '<?php // phpcs:ignore SlevomatCodingStandard.Files.FileLength' . "\n\n" | ||
. \preg_replace('/^[ ]{12}/m', ' ', $comment) . "\n\n" | ||
. 'return ' . self::varExportPretty(self::build()) . ";\n"; | ||
$php = \preg_replace_callback('/[\'"](.)[\'"] => /u', static function ($matches) { | ||
$char = $matches[1]; | ||
$codePoint = \mb_ord($char); | ||
return $codePoint < 0x80 | ||
? '"\\x' . \dechex($codePoint) . '" => ' | ||
: '\'' . $char . '\' => '; | ||
}, $php); | ||
\file_put_contents($filepathOut, $php); | ||
} | ||
|
||
/** | ||
* Build char data | ||
* | ||
* @return array<string, array<string, string|bool>> | ||
*/ | ||
public static function build() | ||
{ | ||
$rows = self::getParsedRows(); | ||
|
||
// only interested in chars that are confusable with an ascii char | ||
// not interested in ascii chars that are confusable with other ascii chars | ||
$rows = \array_filter($rows, static function ($row) { | ||
$isCharAAscii = \strlen($row['charA']) === 1 && \ord($row['charA']) < 0x80; | ||
$isCharBAscii = \strlen($row['charB']) === 1 && \ord($row['charB']) < 0x80; | ||
return $isCharAAscii === false && $isCharBAscii; | ||
}); | ||
|
||
\usort($rows, static function ($rowA, $rowB) { | ||
return \strcmp($rowA['charA'], $rowB['charA']); | ||
}); | ||
|
||
// rekey | ||
$rowsNew = require __DIR__ . '/charData.php'; | ||
foreach ($rows as $row) { | ||
$key = $row['charA']; | ||
if (isset($rowsNew[$key])) { | ||
continue; | ||
} | ||
unset($row['charA']); | ||
$rowsNew[$key] = array( | ||
'codePoint' => $row['charACodePoint'], | ||
'desc' => $row['charADesc'], | ||
'similarTo' => $row['charB'], | ||
); | ||
} | ||
|
||
\ksort($rowsNew); | ||
|
||
return $rowsNew; | ||
} | ||
|
||
/** | ||
* Return parsed data for all confusable data | ||
* | ||
* @return array<string, string|bool>[] | ||
*/ | ||
private static function getParsedRows() | ||
{ | ||
$rows = \file(self::$filepathSrc); | ||
$rows = \array_filter($rows, static function ($row) { | ||
$isEmptyOrComment = \strlen(\trim($row)) === 0 || $row[0] === '#'; | ||
return $isEmptyOrComment === false; | ||
}); | ||
|
||
return \array_map(static function ($row) { | ||
return self::parseRow($row); | ||
}, $rows); | ||
} | ||
|
||
/** | ||
* Parse confusable.txt row | ||
* | ||
* @param string $row non-comment row from data file | ||
* | ||
* @return array<string,mixed> | ||
*/ | ||
protected static function parseRow($row) | ||
{ | ||
$parts = \explode('; ', $row, 3); | ||
$parts = \array_map('trim', $parts); | ||
$parts = \array_combine(array('charACodePoint', 'charBCodePoint', 'comment'), $parts); | ||
|
||
$parts['charACodePoint'] = \implode(' ', \array_map(static function ($codePoint) { | ||
// remove leading 00 pairs | ||
return \preg_replace('/^(00)+/', '', $codePoint); | ||
}, \explode(' ', $parts['charACodePoint']))); | ||
|
||
$parts['charBCodePoint'] = \implode(' ', \array_map(static function ($codePoint) { | ||
// remove leading 00 pairs | ||
return \preg_replace('/^(00)+/', '', $codePoint); | ||
}, \explode(' ', $parts['charBCodePoint']))); | ||
|
||
\preg_match('/^(?P<category>\w+)\t#(?P<notXid>\*?)\s*(?P<example>\(.*?\))\s*(?P<charADesc>.*?) → (?P<charBDesc>.*?)(\s+#.*)?$/u', $parts['comment'], $matches); | ||
$parts = \array_merge($parts, $matches); | ||
|
||
return array( | ||
'charA' => \implode('', \array_map(static function ($hex) { | ||
$codePoint = \hexdec($hex); | ||
return \mb_chr($codePoint, 'UTF-8'); | ||
}, \explode(' ', $parts['charACodePoint']))), | ||
'charACodePoint' => $parts['charACodePoint'], | ||
'charADesc' => $parts['charADesc'], | ||
|
||
'charB' => \implode('', \array_map(static function ($hex) { | ||
$codePoint = \hexdec($hex); | ||
return \mb_chr($codePoint, 'UTF-8'); | ||
}, \explode(' ', $parts['charBCodePoint']))), | ||
'isXid' => empty($parts['notXid']), | ||
); | ||
} | ||
|
||
/** | ||
* export value as valid php | ||
* | ||
* @param mixed $val Value to export | ||
* | ||
* @return string | ||
*/ | ||
protected static function varExportPretty($val) | ||
{ | ||
$php = \var_export($val, true); | ||
$php = \str_replace('array (', 'array(', $php); | ||
$php = \preg_replace('/=> \n\s+array/', '=> array', $php); | ||
$php = \preg_replace_callback('/^(\s*)/m', static function ($matches) { | ||
return \str_repeat($matches[1], 2); | ||
}, $php); | ||
$php = \str_replace('\'\' . "\0" . \'\'', '"\x00"', $php); | ||
return $php; | ||
} | ||
} |
Oops, something went wrong.