Skip to content

Commit

Permalink
[Str] Add the ability to supply string encoding (#55)
Browse files Browse the repository at this point in the history
  • Loading branch information
azjezz committed Sep 27, 2020
1 parent c471b63 commit 25480b4
Show file tree
Hide file tree
Showing 41 changed files with 335 additions and 125 deletions.
1 change: 1 addition & 0 deletions src/Psl/Internal/Loader.php
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ final class Loader
'Psl\Internal\validate_offset',
'Psl\Internal\validate_offset_lower_bound',
'Psl\Internal\lazy_iterator',
'Psl\Internal\internal_encoding',
'Psl\Iter\all',
'Psl\Iter\any',
'Psl\Iter\apply',
Expand Down
26 changes: 26 additions & 0 deletions src/Psl/Internal/internal_encoding.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?php

declare(strict_types=1);

namespace Psl\Internal;

use Psl;
use Psl\Type;
use Psl\Exception;

use function in_array;
use function mb_internal_encoding;
use function mb_list_encodings;

/**
* @psalm-pure
*
* @psalm-suppress ImpureFunctionCall
*
* @throws Exception\InvariantViolationException If an invalid $encoding is provided.
*/
function internal_encoding(?string $encoding = null): string
{
Psl\invariant(null === $encoding || in_array($encoding, mb_list_encodings(), true), 'Invalid encoding.');
return $encoding ?? (Type\is_string($internal_encoding = mb_internal_encoding()) ? $internal_encoding : 'UTF-8');
}
12 changes: 9 additions & 3 deletions src/Psl/Str/capitalize.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

namespace Psl\Str;

use Psl\Exception;

/**
* Returns the string with the first character capitalized.
*
Expand All @@ -25,13 +27,17 @@
* => Str('1337)
*
* @psalm-pure
*
* @throws Exception\InvariantViolationException If an invalid $encoding is provided.
*/
function capitalize(string $string): string
function capitalize(string $string, ?string $encoding = null): string
{
if ('' === $string) {
return '';
}

/** @psalm-suppress MissingThrowsDocblock - $offset is within-bounds */
return concat(uppercase(slice($string, 0, 1)), slice($string, 1, length($string)));
return concat(
uppercase(slice($string, 0, 1, $encoding), $encoding),
slice($string, 1, length($string, $encoding), $encoding)
);
}
13 changes: 11 additions & 2 deletions src/Psl/Str/capitalize_words.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@

namespace Psl\Str;

use Psl\Exception;
use Psl\Internal;

use function mb_convert_case;

use const MB_CASE_TITLE;

/**
* Returns the string with all words capitalized.
*
Expand All @@ -22,8 +29,10 @@
* => Str('مرحبا بكم')
*
* @psalm-pure
*
* @throws Exception\InvariantViolationException If an invalid $encoding is provided.
*/
function capitalize_words(string $string): string
function capitalize_words(string $string, ?string $encoding = null): string
{
return \mb_convert_case($string, \MB_CASE_TITLE, encoding($string));
return mb_convert_case($string, MB_CASE_TITLE, Internal\internal_encoding($encoding));
}
9 changes: 7 additions & 2 deletions src/Psl/Str/chr.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
namespace Psl\Str;

use Psl;
use Psl\Internal;

use function mb_chr;

/**
* Return a specific character.
Expand All @@ -18,11 +21,13 @@
* => Str('ل')
*
* @psalm-pure
*
* @throws Psl\Exception\InvariantViolationException If an invalid $encoding is provided.
*/
function chr(int $ascii): string
function chr(int $ascii, ?string $encoding = null): string
{
/** @var string|false $char */
$char = \mb_chr($ascii, 'UTF-8');
$char = mb_chr($ascii, Internal\internal_encoding($encoding));

/** @psalm-suppress MissingThrowsDocblock */
Psl\invariant(is_string($char), 'Unexpected Error.');
Expand Down
6 changes: 4 additions & 2 deletions src/Psl/Str/chunk.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace Psl\Str;

use Psl;
use Psl\Internal;

/**
* Returns an array containing the string split into chunks of the given size.
Expand Down Expand Up @@ -34,8 +35,9 @@
* @psalm-pure
*
* @throws Psl\Exception\InvariantViolationException If the given $chunk_size is negative or above the limit ( 65535 ).
* @throws Psl\Exception\InvariantViolationException If an invalid $encoding is provided.
*/
function chunk(string $string, int $chunk_size = 1): array
function chunk(string $string, int $chunk_size = 1, ?string $encoding = null): array
{
Psl\invariant($chunk_size >= 1, 'Expected a non-negative chunk size.');
if ('' === $string) {
Expand All @@ -45,5 +47,5 @@ function chunk(string $string, int $chunk_size = 1): array
Psl\invariant(65535 >= $chunk_size, 'Maximum chunk length must not exceed 65535.');

/** @psalm-var list<string> */
return mb_str_split($string, $chunk_size, encoding($string));
return mb_str_split($string, $chunk_size, Internal\internal_encoding($encoding));
}
7 changes: 4 additions & 3 deletions src/Psl/Str/contains.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,15 @@
* @psalm-pure
*
* @throws Psl\Exception\InvariantViolationException If the $offset is out-of-bounds.
* @throws Psl\Exception\InvariantViolationException If an invalid $encoding is provided.
*/
function contains(string $haystack, string $needle, int $offset = 0): bool
function contains(string $haystack, string $needle, int $offset = 0, ?string $encoding = null): bool
{
if ('' === $needle) {
Psl\Internal\validate_offset($offset, length($haystack));
Psl\Internal\validate_offset($offset, length($haystack, $encoding));

return true;
}

return null !== search($haystack, $needle, $offset);
return null !== search($haystack, $needle, $offset, $encoding);
}
7 changes: 4 additions & 3 deletions src/Psl/Str/contains_ci.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,15 @@
* @psalm-pure
*
* @throws Psl\Exception\InvariantViolationException If the $offset is out-of-bounds.
* @throws Psl\Exception\InvariantViolationException If an invalid $encoding is provided.
*/
function contains_ci(string $haystack, string $needle, int $offset = 0): bool
function contains_ci(string $haystack, string $needle, int $offset = 0, ?string $encoding = null): bool
{
if ('' === $needle) {
Psl\Internal\validate_offset($offset, length($haystack));
Psl\Internal\validate_offset($offset, length($haystack, $encoding));

return true;
}

return null !== search_ci($haystack, $needle, $offset);
return null !== search_ci($haystack, $needle, $offset, $encoding);
}
10 changes: 8 additions & 2 deletions src/Psl/Str/encoding.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,16 @@

namespace Psl\Str;

use function mb_detect_encoding;

/**
* Detect the encoding of the giving string.
*
* @psalm-return null|string The string encoding or null if unable to detect encoding.
*
* @psalm-pure
*/
function encoding(string $str): string
function encoding(string $string): ?string
{
return \mb_detect_encoding($str, null, true) ?: 'UTF-8';
return mb_detect_encoding($string, null, true) ?: null;
}
13 changes: 8 additions & 5 deletions src/Psl/Str/ends_with.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

namespace Psl\Str;

use Psl;

/**
* Returns whether the string ends with the given suffix.
*
Expand Down Expand Up @@ -31,21 +33,22 @@
* => Bool(false)
*
* @psalm-pure
*
* @throws Psl\Exception\InvariantViolationException If an invalid $encoding is provided.
*/
function ends_with(string $string, string $suffix): bool
function ends_with(string $string, string $suffix, ?string $encoding = null): bool
{
if ($suffix === $string) {
return true;
}

$suffix_length = length($suffix);
$total_length = length($string);
$suffix_length = length($suffix, $encoding);
$total_length = length($string, $encoding);
if ($suffix_length > $total_length) {
return false;
}

/** @psalm-suppress MissingThrowsDocblock - we don't supply $offset */
$position = search_last($string, $suffix);
$position = search_last($string, $suffix, 0, $encoding);
if (null === $position) {
return false;
}
Expand Down
13 changes: 8 additions & 5 deletions src/Psl/Str/ends_with_ci.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

namespace Psl\Str;

use Psl;

/**
* Returns whether the string ends with the given suffix (case-insensitive).
*
Expand Down Expand Up @@ -31,21 +33,22 @@
* => Bool(false)
*
* @psalm-pure
*
* @throws Psl\Exception\InvariantViolationException If an invalid $encoding is provided.
*/
function ends_with_ci(string $string, string $suffix): bool
function ends_with_ci(string $string, string $suffix, ?string $encoding = null): bool
{
if ($suffix === $string) {
return true;
}

$suffix_length = length($suffix);
$total_length = length($string);
$suffix_length = length($suffix, $encoding);
$total_length = length($string, $encoding);
if ($suffix_length > $total_length) {
return false;
}

/** @psalm-suppress MissingThrowsDocblock - we don't supply $offset */
$position = search_last_ci($string, $suffix);
$position = search_last_ci($string, $suffix, 0, $encoding);
if (null === $position) {
return false;
}
Expand Down
9 changes: 6 additions & 3 deletions src/Psl/Str/fold.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

namespace Psl\Str;

use Psl;
use Psl\Internal;

/**
Expand All @@ -15,12 +16,14 @@
* => Str('ss')
*
* @psalm-pure
*
* @throws Psl\Exception\InvariantViolationException If an invalid $encoding is provided.
*/
function fold(string $str): string
function fold(string $str, ?string $encoding = null): string
{
foreach (Internal\CASE_FOLD as $k => $v) {
$str = replace($str, $k, $v);
$str = replace($str, $k, $v, $encoding);
}

return lowercase($str);
return lowercase($str, $encoding);
}
4 changes: 3 additions & 1 deletion src/Psl/Str/format.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

namespace Psl\Str;

use function vsprintf;

/**
* Return a formatted string.
*
Expand Down Expand Up @@ -35,5 +37,5 @@
*/
function format(string $format, ...$args): string
{
return \vsprintf($format, $args);
return vsprintf($format, $args);
}
4 changes: 3 additions & 1 deletion src/Psl/Str/format_number.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

namespace Psl\Str;

use function number_format;

/**
* Returns a string representation of the given number with grouped thousands.
*
Expand All @@ -19,5 +21,5 @@ function format_number(
string $decimal_point = '.',
string $thousands_separator = ','
): string {
return \number_format($number, $decimals, $decimal_point, $thousands_separator);
return number_format($number, $decimals, $decimal_point, $thousands_separator);
}
4 changes: 3 additions & 1 deletion src/Psl/Str/join.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

namespace Psl\Str;

use function implode;

/**
* Join array elements with a string.
*
Expand All @@ -24,5 +26,5 @@
*/
function join(array $pieces, string $glue): string
{
return \implode($glue, $pieces);
return implode($glue, $pieces);
}
11 changes: 9 additions & 2 deletions src/Psl/Str/length.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

namespace Psl\Str;

use Psl;
use Psl\Internal;

use function mb_strlen;

/**
* Returns the length of the given string, i.e. the number of bytes.
*
Expand All @@ -19,8 +24,10 @@
* => Int(4)
*
* @psalm-pure
*
* @throws Psl\Exception\InvariantViolationException If an invalid $encoding is provided.
*/
function length(string $str): int
function length(string $str, ?string $encoding = null): int
{
return \mb_strlen($str, encoding($str));
return mb_strlen($str, Internal\internal_encoding($encoding));
}
Loading

0 comments on commit 25480b4

Please sign in to comment.