Skip to content

Commit

Permalink
added 'age' property
Browse files Browse the repository at this point in the history
  • Loading branch information
iwsfutcmd committed Jan 19, 2023
1 parent ed2af0d commit 4ba7cc1
Show file tree
Hide file tree
Showing 11 changed files with 5,272 additions and 4,420 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,7 @@ target/
*14.0.0.*
*15.0.0.*
tests/data

# Clinic scripts
clinic*.py
cpp.py
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## 15.0.0-2
- Add age property

## 15.0.0-1
- Add vertical orientation property

Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Currently supported additional Unicode properties:
* Indic Syllabic Category: `indic_syllabic_category(chr)`
* Grapheme Cluster Break: `grapheme_cluster_break(chr)`
* Vertical Orientation: `vertical_orientation(chr)`
* Age: `age(chr)`
* Total Strokes (CJK): `total_strokes(chr)`
* Emoji: `is_emoji(chr)`
* Emoji Presentation: `is_emoji_presentation(chr)`
Expand Down
24 changes: 21 additions & 3 deletions makeunicodedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
INDIC_SYLLABIC_CATEGORY = "IndicSyllabicCategory%s.txt"
GRAPHEME_BREAK_PROPERTY = "auxiliary/GraphemeBreakProperty%s.txt"
VERTICAL_ORIENTATION_PROPERTY = "VerticalOrientation%s.txt"
AGE_PROPERTY = "DerivedAge%s.txt"
EMOJI_DATA = "emoji/emoji-data%s.txt"

# Private Use Areas -- in planes 1, 15, 16
Expand Down Expand Up @@ -977,7 +978,7 @@ def word_key(a):

def makeunicodeprop(unicode, trace):

dummy = (0, 0, 0, 0, 0, 0, 0)
dummy = (0, 0, 0, 0, 0, 0, 0, 0)
table = [dummy]
cache = {0: dummy}
index = [0] * len(unicode.chars)
Expand All @@ -996,7 +997,8 @@ def makeunicodeprop(unicode, trace):
indic_syllabic = unicode.indic_syllabic.index(record.indic_syllabic)
grapheme_cluster_break = unicode.grapheme_cluster_break.index(record.grapheme_cluster_break)
vertical_orientation = unicode.vertical_orientation.index(record.vertical_orientation)
item = (script, block, script_extensions, indic_positional, indic_syllabic, grapheme_cluster_break, vertical_orientation)
age = unicode.age.index(record.age)
item = (script, block, script_extensions, indic_positional, indic_syllabic, grapheme_cluster_break, vertical_orientation, age)
i = cache.get(item)
if i is None:
cache[item] = i = len(table)
Expand All @@ -1016,7 +1018,7 @@ def makeunicodeprop(unicode, trace):
fprint("/* a list of unique unicode property sets */")
fprint("static const _PyUnicodePlus_PropertySet _PyUnicodePlus_Property_Sets[] = {")
for item in table:
fprint(" {%d, %d, %d, %d, %d, %d, %d}," % item)
fprint(" {%d, %d, %d, %d, %d, %d, %d, %d}," % item)
fprint("};")
fprint()

Expand Down Expand Up @@ -1063,6 +1065,12 @@ def makeunicodeprop(unicode, trace):
fprint(" NULL")
fprint("};")

fprint("static const char *_PyUnicodePlus_AgeNames[] = {")
for name in unicode.age:
fprint(" \"%s\"," % name)
fprint(" NULL")
fprint("};")

# split property set index table
index1, index2, shift = splitbins(index, trace)

Expand Down Expand Up @@ -1521,6 +1529,16 @@ def __init__(self, version, cjk_check=True):
if table[i] is not None:
table[i].vertical_orientation = vertical_orientation[i]

age = ["Unassigned"] * 0x110000
for char, (ag, ) in UcdFile(AGE_PROPERTY, version).expanded():
age[char] = ag

self.age = ["Unassigned"] + sorted(set(age) - {"Unassigned"})

for i in range(0, 0x110000):
if table[i] is not None:
table[i].age = age[i]

for char, (p,) in UcdFile(EMOJI_DATA, version).expanded():
if table[char]:
table[char].binary_properties.add(p)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

setup(
name="unicodedataplus",
version="15.0.0-1",
version="15.0.0-2",
description="Unicodedata with extensions for additional properties.",
ext_modules=[main_module],
author="Ben Yang",
Expand Down
18 changes: 18 additions & 0 deletions tests/test_unicodedataplus.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,24 @@ def test_vertical_orientation(self):
self.assertEqual(self.db.vertical_orientation('\U0001E040'), 'R')
self.assertEqual(self.db.vertical_orientation('\U0001F200'), 'Tu')

def test_age(self):
self.assertEqual(self.db.age('\u03DA'), '1.1')
self.assertEqual(self.db.age('\u20AB'), '2.0')
self.assertEqual(self.db.age('\u20AC'), '2.1')
self.assertEqual(self.db.age('\u058A'), '3.0')
self.assertEqual(self.db.age('\U00010423'), '3.1')
self.assertEqual(self.db.age('\u07B1'), '3.2')
self.assertEqual(self.db.age('\U00010083'), '4.0')
self.assertEqual(self.db.age('\u131F'), '4.1')
self.assertEqual(self.db.age('\U0001D363'), '5.0')
self.assertEqual(self.db.age('\uA95F'), '5.1')
self.assertEqual(self.db.age('\u0C34'), '7.0')
self.assertEqual(self.db.age('\U0001F6F8'), '10.0')
self.assertEqual(self.db.age('\u0EAC'), '12.0')
self.assertEqual(self.db.age('\U0002A6D9'), '13.0')
self.assertEqual(self.db.age('\u170D'), '14.0')


def test_total_strokes(self):
self.assertEqual(self.db.total_strokes('P'), 0)
self.assertEqual(self.db.total_strokes('\u694A'), 13)
Expand Down
32 changes: 31 additions & 1 deletion unicodedataplus/unicodedata.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ typedef struct {
_PyUnicodePlus_Grapheme_Cluster_Break */
const int vertical_orientation; /* index into
_PyUnicodePlus_Vertical_Orientation */

const int age; /* index into
_PyUnicodePlus_Age */
} _PyUnicodePlus_PropertySet;

typedef struct {
Expand All @@ -95,6 +96,7 @@ typedef struct change_record {
const unsigned char indic_syllabic_category_changed;
const unsigned char grapheme_cluster_break_changed;
const unsigned char vertical_orientation_changed;
const unsigned char age_changed;
const unsigned char total_strokes_changed;
} change_record;

Expand Down Expand Up @@ -707,6 +709,33 @@ unicodedata_UCD_vertical_orientation_impl(PyObject *self, int chr)
return PyUnicode_FromString(_PyUnicodePlus_VerticalOrientationNames[index]);
}

/*[clinic input]
unicodedata.UCD.age
self: self
chr: int(accept={str})
/
Returns the Age property of the character chr as string.
[clinic start generated code]*/

static PyObject *
unicodedata_UCD_age_impl(PyObject *self, int chr)
/*[clinic end generated code: output=65b9ca0dc56b5516 input=57aa81559ef3dc45]*/
{
int index;
Py_UCS4 c = (Py_UCS4)chr;
index = (int) _getpropset_ex(c)->age;
if (UCD_Check(self)) {
const change_record *old = get_old_record(self, c);
if (old->category_changed == 0)
index = 0; /* unassigned */
else if (old->age_changed != 0xFF)
index = old->age_changed;
}
return PyUnicode_FromString(_PyUnicodePlus_AgeNames[index]);
}

/*[clinic input]
unicodedata.UCD.total_strokes
Expand Down Expand Up @@ -1987,6 +2016,7 @@ static PyMethodDef unicodedata_functions[] = {
UNICODEDATA_UCD_INDIC_SYLLABIC_CATEGORY_METHODDEF
UNICODEDATA_UCD_GRAPHEME_CLUSTER_BREAK_METHODDEF
UNICODEDATA_UCD_VERTICAL_ORIENTATION_METHODDEF
UNICODEDATA_UCD_AGE_METHODDEF
UNICODEDATA_UCD_TOTAL_STROKES_METHODDEF
UNICODEDATA_UCD_DECOMPOSITION_METHODDEF
UNICODEDATA_UCD_NAME_METHODDEF
Expand Down
29 changes: 28 additions & 1 deletion unicodedataplus/unicodedata.c.37.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,33 @@ unicodedata_UCD_vertical_orientation(PyObject *self, PyObject *arg)
return return_value;
}

PyDoc_STRVAR(unicodedata_UCD_age__doc__,
"age($self, chr, /)\n"
"--\n"
"\n"
"Returns the Age property of the character chr as string.");

#define UNICODEDATA_UCD_AGE_METHODDEF \
{"age", (PyCFunction)unicodedata_UCD_age, METH_O, unicodedata_UCD_age__doc__},

static PyObject *
unicodedata_UCD_age_impl(PyObject *self, int chr);

static PyObject *
unicodedata_UCD_age(PyObject *self, PyObject *arg)
{
PyObject *return_value = NULL;
int chr;

if (!PyArg_Parse(arg, "C:age", &chr)) {
goto exit;
}
return_value = unicodedata_UCD_age_impl(self, chr);

exit:
return return_value;
}

PyDoc_STRVAR(unicodedata_UCD_total_strokes__doc__,
"total_strokes($self, chr, /)\n"
"--\n"
Expand Down Expand Up @@ -791,4 +818,4 @@ unicodedata_UCD_is_extended_pictographic(PyObject *self, PyObject *arg)
exit:
return return_value;
}
/*[clinic end generated code: output=48287109f3866503 input=a9049054013a1b77]*/
/*[clinic end generated code: output=786b068a16dce3a5 input=a9049054013a1b77]*/
38 changes: 37 additions & 1 deletion unicodedataplus/unicodedata.c.h
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,42 @@ unicodedata_UCD_vertical_orientation(PyObject *self, PyObject *arg)
return return_value;
}

PyDoc_STRVAR(unicodedata_UCD_age__doc__,
"age($self, chr, /)\n"
"--\n"
"\n"
"Returns the Age property of the character chr as string.");

#define UNICODEDATA_UCD_AGE_METHODDEF \
{"age", (PyCFunction)unicodedata_UCD_age, METH_O, unicodedata_UCD_age__doc__},

static PyObject *
unicodedata_UCD_age_impl(PyObject *self, int chr);

static PyObject *
unicodedata_UCD_age(PyObject *self, PyObject *arg)
{
PyObject *return_value = NULL;
int chr;

if (!PyUnicode_Check(arg)) {
_PyArg_BadArgument("age", "argument", "a unicode character", arg);
goto exit;
}
if (PyUnicode_READY(arg)) {
goto exit;
}
if (PyUnicode_GET_LENGTH(arg) != 1) {
_PyArg_BadArgument("age", "argument", "a unicode character", arg);
goto exit;
}
chr = PyUnicode_READ_CHAR(arg, 0);
return_value = unicodedata_UCD_age_impl(self, chr);

exit:
return return_value;
}

PyDoc_STRVAR(unicodedata_UCD_total_strokes__doc__,
"total_strokes($self, chr, /)\n"
"--\n"
Expand Down Expand Up @@ -1065,4 +1101,4 @@ unicodedata_UCD_is_extended_pictographic(PyObject *self, PyObject *arg)
exit:
return return_value;
}
/*[clinic end generated code: output=03097ed6016ce64a input=a9049054013a1b77]*/
/*[clinic end generated code: output=652110d3ec494e7a input=a9049054013a1b77]*/
28 changes: 27 additions & 1 deletion unicodedataplus/unicodedata.c.pypy.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,32 @@ unicodedata_UCD_vertical_orientation(PyObject *self, PyObject *arg)
return return_value;
}

PyDoc_STRVAR(unicodedata_UCD_age__doc__,
"age($self, chr, /)\n"
"--\n"
"\n"
"Returns the Age property of the character chr as string.");

#define UNICODEDATA_UCD_AGE_METHODDEF \
{"age", (PyCFunction)unicodedata_UCD_age, METH_O, unicodedata_UCD_age__doc__},

static PyObject *
unicodedata_UCD_age_impl(PyObject *self, int chr);

static PyObject *
unicodedata_UCD_age(PyObject *self, PyObject *arg)
{
PyObject *return_value = NULL;
int chr;

if (!PyArg_Parse(arg, "C:age", &chr))
goto exit;
return_value = unicodedata_UCD_age_impl(self, chr);

exit:
return return_value;
}

PyDoc_STRVAR(unicodedata_UCD_total_strokes__doc__,
"total_strokes($self, chr, /)\n"
"--\n"
Expand Down Expand Up @@ -764,4 +790,4 @@ unicodedata_UCD_is_extended_pictographic(PyObject *self, PyObject *arg)
exit:
return return_value;
}
/*[clinic end generated code: output=dd3db21bb6c026ae input=a9049054013a1b77]*/
/*[clinic end generated code: output=b84333aeb6f0ccb5 input=a9049054013a1b77]*/
Loading

0 comments on commit 4ba7cc1

Please sign in to comment.