Skip to content

Commit

Permalink
Implement C23 identifiers (minus normalization)
Browse files Browse the repository at this point in the history
  • Loading branch information
rikkimax committed Jun 11, 2023
1 parent f2b3b97 commit 4b8ffa0
Show file tree
Hide file tree
Showing 10 changed files with 3,147 additions and 290 deletions.
2 changes: 1 addition & 1 deletion compiler/src/build.d
Original file line number Diff line number Diff line change
Expand Up @@ -1577,7 +1577,7 @@ auto sourceFiles()
rootobject.d stringtable.d utf.d
"),
common: fileArray(env["COMMON"], "
bitfields.d file.d int128.d outbuffer.d string.d
bitfields.d file.d int128.d outbuffer.d string.d unicode.d unicode_tables.d
"),
commonHeaders: fileArray(env["COMMON"], "
outbuffer.h
Expand Down
97 changes: 97 additions & 0 deletions compiler/src/dmd/common/unicode.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/**
* Functions related to Unicode handling
*
* Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
* Authors: $(LINK2 https://cattermole.co.nz, Richard (Rikki) Andrew Cattermole
* License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
* Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/common/unicode.d, _unicode.d)
* Documentation: https://dlang.org/phobos/dmd_common_unicode.html
* Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/common/unicode.d
*/
module dmd.common.unicode;
import dmd.common.unicode_tables;

@safe nothrow @nogc pure:

/// Returns: is character a Unicode alpha (Lu, Ll, Lt, Lm, Lo).
bool isUniAlpha(dchar c)
{
return isInRange!AlphaCharacterRanges(c);
}

///
unittest
{
// lets test a few random alpha characters
assert(isUniAlpha('A'));
assert(isUniAlpha('ğ'));
}

/// Returns: is character a member of UAX31 Start
bool isUAX31Start(dchar c)
{
if (isInRange!Start_Starter(c))
return true;

// @@@DEPRECATED_2.110@@@
// remove maybe, 2.120?
return isInRange!Start_Legacy_NonXID_Start(c);
}

///
unittest
{
assert(isUAX31Start('A'));
assert(isUAX31Start('ğ'));
assert(isUAX31Start('_'));
}

/// Returns: is character a member of UAX31 Continue
bool isUAX31Continue(dchar c)
{
if (isInRange!Continue_Starter(c))
return true;

// @@@DEPRECATED_2.110@@@
// remove maybe, 2.120?
if (isInRange!Continue_NonStarter(c))
return true;

// @@@DEPRECATED_2.110@@@
// remove maybe, 2.120?
return isInRange!Continue_Legacy_NonXID_Continue(c);
}

///
unittest
{
assert(isUAX31Continue('A'));
assert(isUAX31Continue('ğ'));
assert(isUAX31Continue('_'));
assert(isUAX31Continue('9'));
}

private:

// originally from dmd.root.utf
bool isInRange(alias Ranges)(dchar c)
{
size_t high = Ranges.length - 1;
// Shortcut search if c is out of range
size_t low = (c < Ranges[0][0] || Ranges[high][1] < c) ? high + 1 : 0;
// Binary search
while (low <= high)
{
const size_t mid = low + ((high - low) >> 1);
if (c < Ranges[mid][0])
high = mid - 1;
else if (Ranges[mid][1] < c)
low = mid + 1;
else
{
assert(Ranges[mid][0] <= c && c <= Ranges[mid][1]);
return true;
}
}
return false;
}

0 comments on commit 4b8ffa0

Please sign in to comment.