Skip to content

Commit

Permalink
[ENHANCEMENT] - Replace unicode codepoints without regex (#38)
Browse files Browse the repository at this point in the history
  • Loading branch information
joshmcrae committed Mar 28, 2023
1 parent f13cf10 commit 97a3e63
Show file tree
Hide file tree
Showing 13 changed files with 125 additions and 3,266 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
timeout-minutes: 10
strategy:
matrix:
php-versions: [ '7.3', '7.4', '8.0', '8.1' ]
php-versions: [ '7.4', '8.0', '8.1', '8.2' ]
name: PHP ${{ matrix.php-versions }}

steps:
Expand Down
4 changes: 2 additions & 2 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright (C) Elvanto Pty Ltd <developers@elvanto.com>
Copyright (C) 2017-2023 Elvanto Pty Ltd <developers@elvanto.com>

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
Expand All @@ -16,4 +16,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
SOFTWARE.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ echo LitEmoji::encodeShortcode('📱');
// ':iphone:'
```

# Encodings

LitEmoji's various functions will do their best to detect the encoding of the
provided text and should work on UTF-8 encoded strings without issue. In cases
where the encoding cannot be detected, UTF-8 is assumed, however a second argument
can be provided to any of the functions to hint the actual encoded of the provided
string.

## Contributing

Pull requests are welcome. New code must be fully unit tested (the existing
Expand Down
33 changes: 18 additions & 15 deletions bin/generate-shortcodes-array.php
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
#!/usr/bin/env php
<?php

function normalizeShortcode($shortcode) {
function normalizeShortcode($shortcode)
{
return str_replace('-', '_', strtolower($shortcode));
}

// Collect available emoji
$data = json_decode(file_get_contents(__DIR__ . '/../vendor/milesj/emojibase/packages/data/en/data.raw.json'), true);
$shortcodes = json_decode(file_get_contents(__DIR__ . '/../vendor/milesj/emojibase/packages/data/en/shortcodes/emojibase.raw.json'), true);

$emoji_array = require(__DIR__ . '/../src/shortcodes-array.php');
$existing_shortcodes = array_map('normalizeShortcode', array_keys($emoji_array));
$emojiList = require(__DIR__ . '/../src/emoji.php');
$existingShortcodes = array_map('normalizeShortcode', array_keys($emojiList));

foreach ($data as $emoji) {

if (
!isset($shortcodes[$emoji['hexcode']]) ||
!array_key_exists('group', $emoji) // Excludes regional indicator emoji that mess with flags
!isset($shortcodes[$emoji['hexcode']]) ||
!array_key_exists('group', $emoji) // Excludes regional indicator emoji that mess with flags
) {
continue;
}
Expand All @@ -25,19 +26,21 @@ function normalizeShortcode($shortcode) {
}

foreach ($shortcodes[$emoji['hexcode']] as $shortcode) {

if (in_array(normalizeShortcode($shortcode), $existing_shortcodes)) {
if (in_array(normalizeShortcode($shortcode), $existingShortcodes)) {
continue;
}

$emoji_array[ (string) $shortcode] = $emoji['hexcode'];
$emojiList[(string)$shortcode] = $emoji['hexcode'];
}
}

ksort($emoji_array, SORT_NATURAL);
$output = "<?php\nreturn [\n";
foreach ($emoji_array as $shortcode => $codepoints) {
$output .= " '$shortcode' => '$codepoints',\n";
// Order by longest codepoint to ensure replacement of ZWJ emoji first
uasort($emojiList, fn ($a, $b) => strlen($b) <=> strlen($a));

// Generate cachable PHP code
$output = [];
foreach ($emojiList as $shortcode => $codepoints) {
$output[] = sprintf("'%s'=>'%s'", $shortcode, $codepoints);
};
$output .= '];';
file_put_contents('src/shortcodes-array.php', $output);

file_put_contents('src/emoji.php', sprintf('<?php return [%s];', implode(',', $output)));
114 changes: 0 additions & 114 deletions bin/generate-unicode-regex.php

This file was deleted.

5 changes: 2 additions & 3 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"keywords": ["php-emoji", "emoji"],
"license": "MIT",
"require": {
"php": ">=7.3",
"php": ">=7.4",
"ext-mbstring": "*"
},
"require-dev": {
Expand All @@ -31,8 +31,7 @@
},
"scripts": {
"update-resources": [
"@php bin/generate-shortcodes-array.php",
"@php bin/generate-unicode-regex.php"
"@php bin/generate-shortcodes-array.php"
]
}
}
Loading

0 comments on commit 97a3e63

Please sign in to comment.