Support unicode identifiers

Fixes #1091
brian-team · Apr 23, 2024 · bfb2aad · bfb2aad
1 parent 222a47b
commit bfb2aad
Show file tree

Hide file tree

Showing 5 changed files with 59 additions and 22 deletions.
diff --git a/brian2/equations/equations.py b/brian2/equations/equations.py
@@ -20,6 +20,7 @@
     Suppress,
     Word,
     ZeroOrMore,
+    pyparsing_unicode,
     restOfLine,
 )
 
@@ -73,7 +74,7 @@
 # Note that the check_identifiers function later performs more checks, e.g.
 # names starting with underscore should only be used internally
 IDENTIFIER = Word(
-    string.ascii_letters + "_", string.ascii_letters + string.digits + "_"
+    pyparsing_unicode.alphas + "_", pyparsing_unicode.alphanums + "_"
 ).setResultsName("identifier")
 
 # very broad definition here, expression will be analysed by sympy anyway
@@ -139,10 +140,9 @@ def check_identifier_basic(identifier):
     Check an identifier (usually resulting from an equation string provided by
     the user) for conformity with the rules. The rules are:
 
-        1. Only ASCII characters
-        2. Starts with a character, then mix of alphanumerical characters and
+        1. Starts with a character, then mix of alphanumerical characters and
            underscore
-        3. Is not a reserved keyword of Python
+        2. Is not a reserved keyword of Python
 
     Parameters
     ----------

diff --git a/brian2/groups/neurongroup.py b/brian2/groups/neurongroup.py
@@ -3,12 +3,10 @@
 """
 
 import numbers
-import string
 from collections.abc import MutableMapping, Sequence
 
 import numpy as np
 import sympy
-from pyparsing import Word
 
 from brian2.codegen.translation import analyse_identifiers
 from brian2.core.preferences import prefs
@@ -21,6 +19,7 @@
 )
 from brian2.equations.equations import (
     DIFFERENTIAL_EQUATION,
+    IDENTIFIER,
     PARAMETER,
     SUBEXPRESSION,
     Equations,
@@ -51,11 +50,6 @@
 logger = get_logger(__name__)
 
 
-IDENTIFIER = Word(
-    f"{string.ascii_letters}_", f"{string.ascii_letters + string.digits}_"
-).setResultsName("identifier")
-
-
 def _valid_event_name(event_name):
     """
     Helper function to check whether a name is a valid name for an event.

diff --git a/brian2/parsing/statements.py b/brian2/parsing/statements.py
@@ -6,13 +6,14 @@
     Regex,
     Suppress,
     Word,
-    alphas,
-    nums,
+    pyparsing_unicode,
 )
 
 from brian2.utils.caching import cached
 
-VARIABLE = Word(f"{alphas}_", f"{alphas + nums}_").setResultsName("variable")
+VARIABLE = Word(
+    pyparsing_unicode.alphas + "_", pyparsing_unicode.alphanums + "_"
+).setResultsName("variable")
 
 OP = Regex(r"(\+|\-|\*|/|//|%|\*\*|>>|<<|&|\^|\|)?=").setResultsName("operation")
 EXPR = Combine(

diff --git a/brian2/tests/test_neurongroup.py b/brian2/tests/test_neurongroup.py
@@ -7,6 +7,7 @@
 
 from brian2.core.base import BrianObjectException
 from brian2.core.clocks import defaultclock
+from brian2.core.functions import implementation
 from brian2.core.magic import run
 from brian2.core.network import Network
 from brian2.core.preferences import prefs
@@ -19,7 +20,11 @@
 from brian2.synapses.synapses import Synapses
 from brian2.tests.utils import assert_allclose, exc_isinstance
 from brian2.units.allunits import second, volt
-from brian2.units.fundamentalunits import DimensionMismatchError, have_same_dimensions
+from brian2.units.fundamentalunits import (
+    DimensionMismatchError,
+    check_units,
+    have_same_dimensions,
+)
 from brian2.units.stdunits import Hz, ms, mV
 from brian2.units.unitsafefunctions import linspace
 from brian2.utils.logger import catch_logs
@@ -2217,6 +2222,41 @@ def test_semantics_mod():
     assert_allclose(G.y[:], float_values % 98)
 
 
+@pytest.mark.standalone_compatible
+def test_unicode_identifiers():
+    # Test support for unicode variable names, function names, and constants
+    τ = 10 * ms
+
+    @implementation(
+        "cpp",
+        """
+    double π_times(double x) {
+        return M_PI*x;
+    }
+    """,
+    )
+    @implementation(
+        "cython",
+        """
+    cdef double π_times(double x):
+        return M_PI*x
+    """,
+    )
+    @check_units(x=1 / second, result=1 / second)
+    def π_times(x):
+        return np.pi * x
+
+    eqs = """
+    dv/dt = -v / τ + π_times(σ) : 1
+    σ : 1/second"""
+    group = NeuronGroup(2, eqs)
+    group.σ = [1, 2] * Hz
+
+    run(5 * ms)
+    assert_allclose(group.v, [0.012361203888596196, 0.024722407777192392])
+    assert_equal(group.σ, [1, 2] * Hz)
+
+
 if __name__ == "__main__":
     test_set_states()
     test_creation()
@@ -2292,3 +2332,4 @@ def test_semantics_mod():
     test_semantics_floor_division()
     test_semantics_floating_point_division()
     test_semantics_mod()
+    test_unicode_identifiers()
diff --git a/brian2/utils/stringtools.py b/brian2/utils/stringtools.py
@@ -3,7 +3,6 @@
 """
 
 import re
-import string
 
 __all__ = [
     "indent",
@@ -18,6 +17,8 @@
     "SpellChecker",
 ]
 
+from pyparsing import pyparsing_unicode
+
 
 def indent(text, numtabs=1, spacespertab=4, tab=None):
     """
@@ -164,7 +165,7 @@ def get_identifiers(expr, include_numbers=False):
     """
     Return all the identifiers in a given string ``expr``, that is everything
     that matches a programming language variable like expression, which is
-    here implemented as the regexp ``\\b[A-Za-z_][A-Za-z0-9_]*\\b``.
+    here implemented as the regexp ``\b([^\\W\\d]|_)\\w*\b``.
 
     Parameters
     ----------
@@ -180,15 +181,15 @@ def get_identifiers(expr, include_numbers=False):
 
     Examples
     --------
-    >>> expr = '3-a*_b+c5+8+f(A - .3e-10, tau_2)*17'
+    >>> expr = '3-a*_b+c5+8+f(A - .3e-10, tau_2, σ)*17'
     >>> ids = get_identifiers(expr)
     >>> print(sorted(list(ids)))
-    ['A', '_b', 'a', 'c5', 'f', 'tau_2']
+    ['A', '_b', 'a', 'c5', 'f', 'tau_2', 'σ']
     >>> ids = get_identifiers(expr, include_numbers=True)
     >>> print(sorted(list(ids)))
     ['.3e-10', '17', '3', '8', 'A', '_b', 'a', 'c5', 'f', 'tau_2']
     """
-    identifiers = set(re.findall(r"\b[A-Za-z_][A-Za-z0-9_]*\b", expr))
+    identifiers = set(re.findall(r"\b(?:[^\W\d]|_)\w*\b", expr))
     if include_numbers:
         # only the number, not a + or -
         numbers = set(
@@ -286,10 +287,10 @@ class SpellChecker:
         The known words
     alphabet : iterable of str, optional
         The allowed characters. Defaults to the characters allowed for
-        identifiers, i.e. ascii characters, digits and the underscore.
+        identifiers, i.e. characters, digits and the underscore.
     """
 
-    def __init__(self, words, alphabet=f"{string.ascii_lowercase + string.digits}_"):
+    def __init__(self, words, alphabet=f"{pyparsing_unicode.alphanums}_"):
         self.words = words
         self.alphabet = alphabet