Skip to content

Commit

Permalink
add pattern to find names within text
Browse files Browse the repository at this point in the history
  • Loading branch information
mcflugen committed Mar 3, 2024
1 parent 66f7890 commit 248a248
Showing 1 changed file with 20 additions and 1 deletion.
21 changes: 20 additions & 1 deletion src/standard_names/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,25 @@
re.VERBOSE,
)

_PATTERN = re.compile(
r"""
(?<!\w) # Negative look-behind for a non-word character
[a-z]+ # Starts with one or more lowercase letters
(?: # Start of a non-capturing group for subsequent parts
[-~_]? # Optional separator: hyphen, tilde, or underscore
[a-zA-Z0-9]+ # One or more alphanumeric characters
)* # Zero or more repetitions of the group
__ # Double underscore separator
[a-z]+ # Another lowercase word
(?: # Start of a non-capturing group for subsequent parts
[-~_]? # Optional separator: hyphen, tilde, or underscore
[a-zA-Z0-9]+ # One or more alphanumeric characters
)* # Zero or more repetitions of the group
(?=\W|$) # Positive look-ahead for a space or end of string
""",
re.VERBOSE,
)


def findall(line: str) -> list[str]:
return STANDARD_NAME_REGEX.findall(line.strip())
return _PATTERN.findall(line.strip())

0 comments on commit 248a248

Please sign in to comment.