Skip to content

Commit

Permalink
Implement LitEmoji::removeEmoji() (#28)
Browse files Browse the repository at this point in the history
  • Loading branch information
KarelWintersky committed Nov 18, 2021
1 parent e70a2b7 commit b48e23c
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 12 deletions.
13 changes: 13 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
* text=auto
* eol=lf

.git export-ignore
.gitattributes export-ignore
.gitignore export-ignore
.github export-ignore
tests export-ignore
vendor export-ignore
phpunit.xml export-ignore
bin export-ignore


4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ echo LitEmoji::encodeHtml('Baby you light my :fire:! :smiley:');

echo LitEmoji::encodeUnicode('Baby you light my :fire:! :smiley:');
// 'Baby you light my 馃敟! 馃槂'

echo LitEmoji::removeEmoji('Baby you light my 馃敟! 馃槂!!!');
// 'Baby you light my ! !!!'

```

# Configuration
Expand Down
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"keywords": ["php-emoji", "emoji"],
"license": "MIT",
"require": {
"php": ">=7.3"
"php": ">=7.3",
"ext-mbstring": "*"
},
"require-dev": {
"phpunit/phpunit": "9.4.*",
Expand Down
36 changes: 25 additions & 11 deletions src/LitEmoji.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

class LitEmoji
{
const MB_REGEX = '/(
public const MB_REGEX = '/(
\x23\xE2\x83\xA3 # Digits
[\x30-\x39]\xE2\x83\xA3
| \xE2[\x9C-\x9E][\x80-\xBF] # Dingbats
Expand All @@ -28,7 +28,7 @@ class LitEmoji
* @param string $content
* @return string
*/
public static function encodeShortcode($content)
public static function encodeShortcode(string $content): string
{
$content = self::entitiesToUnicode($content);
$content = self::unicodeToShortcode($content);
Expand All @@ -42,7 +42,7 @@ public static function encodeShortcode($content)
* @param string $content
* @return string
*/
public static function encodeHtml($content)
public static function encodeHtml(string $content): string
{
$content = self::unicodeToShortcode($content);
$content = self::shortcodeToEntities($content);
Expand All @@ -56,7 +56,7 @@ public static function encodeHtml($content)
* @param string $content
* @return string
*/
public static function encodeUnicode($content)
public static function encodeUnicode(string $content): string
{
$content = self::shortcodeToUnicode($content);
$content = self::entitiesToUnicode($content);
Expand All @@ -70,7 +70,7 @@ public static function encodeUnicode($content)
* @param string $content
* @return string
*/
public static function shortcodeToUnicode($content)
public static function shortcodeToUnicode(string $content): string
{
$replacements = self::getShortcodeCodepoints();
return str_replace(array_keys($replacements), $replacements, $content);
Expand All @@ -82,10 +82,10 @@ public static function shortcodeToUnicode($content)
* @param string $content
* @return string
*/
public static function entitiesToUnicode($content)
public static function entitiesToUnicode(string $content): string
{
/* Convert HTML entities to uppercase hexadecimal */
$content = preg_replace_callback('/\&\#(x?[a-zA-Z0-9]*?)\;/', function($matches) {
$content = preg_replace_callback('/\&\#(x?[a-zA-Z0-9]*?)\;/', static function($matches) {
$code = $matches[1];

if ($code[0] == 'x') {
Expand All @@ -105,7 +105,7 @@ public static function entitiesToUnicode($content)
* @param string $content
* @return string
*/
public static function unicodeToShortcode($content)
public static function unicodeToShortcode(string $content): string
{
$replacement = '';
$encoding = mb_detect_encoding($content);
Expand Down Expand Up @@ -155,7 +155,8 @@ public static function unicodeToShortcode($content)
* @param string $content
* @return string
*/
public static function shortcodeToEntities($content) {
public static function shortcodeToEntities(string $content): string
{
$replacements = self::getShortcodeEntities();
return str_replace(array_keys($replacements), $replacements, $content);
}
Expand All @@ -166,7 +167,7 @@ public static function shortcodeToEntities($content) {
* @param string $property
* @param mixed $value
*/
public static function config($property, $value)
public static function config(string $property, $value): void
{
switch ($property) {
case 'excludeShortcodes':
Expand All @@ -187,6 +188,19 @@ public static function config($property, $value)
break;
}
}

/**
* Removes all emoji-sequences from string.
*
* @param string $source
* @return string
*/
public static function removeEmoji(string $source): string
{
$content = self::encodeShortcode($source);
$content = preg_replace('/\:\w+\:/', '', $content);
return $content;
}

private static function getShortcodes()
{
Expand All @@ -195,7 +209,7 @@ private static function getShortcodes()
}

// Skip excluded shortcodes
self::$shortcodes = array_filter(require(__DIR__ . '/shortcodes-array.php'), function($code) {
self::$shortcodes = array_filter(require(__DIR__ . '/shortcodes-array.php'), static function($code) {
return !in_array($code, self::$excludedShortcodes);
}, ARRAY_FILTER_USE_KEY);

Expand Down
6 changes: 6 additions & 0 deletions tests/LitEmojiTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ public function testUnicodeToShortcodeTiming()
$text = LitEmoji::encodeShortcode(file_get_contents(__DIR__ . '/UnicodeIpsum'));
$this->assertEquals(file_get_contents(__DIR__ . '/ShortcodeIpsum'), $text);
}

public function testRemoveEmoji()
{
$text = LitEmoji::removeEmoji('Some text 馃槉 including emoji 馃殌');
$this->assertEquals('Some text including emoji ', $text); // NB: smile emoji surrounded with TWO spaces.
}

public function testConfigExcludeShortcodes()
{
Expand Down

0 comments on commit b48e23c

Please sign in to comment.