Merge pull request #16 from csdms/mcflugen/update-list

Update standard names list
csdms · Mar 3, 2024 · 12b3b60 · 12b3b60
2 parents 52cc7a3 + f391272
commit 12b3b60
Show file tree

Hide file tree

Showing 25 changed files with 7,406 additions and 1,150 deletions.
diff --git a/README.md b/README.md
@@ -1,22 +1,67 @@
-[![Test](https://github.com/csdms/standard_names/actions/workflows/test.yml/badge.svg)](https://github.com/csdms/standard_names/actions/workflows/test.yml)
-[![Documentation Status](https://readthedocs.org/projects/standard-names/badge/?version=latest)](http://standard-names.readthedocs.io/en/latest/?badge=latest)
-[![Coverage Status](https://coveralls.io/repos/github/csdms/standard_names/badge.svg?branch=master)](https://coveralls.io/github/csdms/standard_names?branch=master)
-[![Conda Version](https://img.shields.io/conda/vn/conda-forge/standard_names.svg)](https://anaconda.org/conda-forge/standard_names)
-[![PyPI](https://img.shields.io/pypi/v/standard_names)](https://pypi.org/project/standard_names)
+![[Python][pypi-link]][python-badge]
+![[Build Status][build-link]][build-badge]
+![[PyPI][pypi-link]][pypi-badge]
+![[Build Status][anaconda-link]][anaconda-badge]
 
 
-standard_names
-==============
+[anaconda-badge]: https://anaconda.org/conda-forge/standard_names/badges/version.svg
+[anaconda-link]: https://anaconda.org/conda-forge/standard_names
+[build-badge]: https://github.com/csdms/standard_names/actions/workflows/test.yml/badge.svg
+[build-link]: https://github.com/csdms/standard_names/actions/workflows/test.yml
+[csdms-workbench]: https://csdms.colorado.edu/wiki/Workbench
+[pypi-badge]: https://badge.fury.io/py/standard_names.svg
+[pypi-link]: https://pypi.org/project/standard_names/
+[python-badge]: https://img.shields.io/pypi/pyversions/standard_names.svg
+
+# standard_names
 
 Python utilities for working with CSDMS Standard Names.
 
-CSDMS Standard Names is an element of the [CSDMS Workbench](https://csdms.colorado.edu/wiki/Workbench),
+CSDMS Standard Names is an element of the [CSDMS Workbench][csdms-workbench],
 an integrated system of software tools, technologies, and standards
 for building and coupling models.
 
+## As Regular Expression
+
+```
+^                           # Start of the object name
+[a-z]+                      # Starts with one or more lowercase letters
+(?:                         # Start of a non-capturing group for subsequent parts
+    [-~_]?                  # Optional separator: hyphen, tilde, or underscore
+    [a-zA-Z0-9]+            # One or more alphanumeric characters
+)*                          # Zero or more repetitions of the group
+__                          # Double underscore separator
+[a-z]+                      # Start of the quantity
+(?:                         # Start of a non-capturing group for subsequent parts
+    [-~_]?                  # Optional separator: hyphen, tilde, or underscore
+    [a-zA-Z0-9]+            # One or more alphanumeric characters
+)*                          # Zero or more repetitions of the group
+$                           # End of the name
+```
+
+## As Parsing Expression Grammar
+
+```peg
+Start
+    = LowercaseWord UnderscoreSeparator LowercaseWord
+
+LowercaseWord
+    = [a-z] AdditionalCharacters*
+
+AdditionalCharacters
+    = Separator? Alphanumeric+
+
+Separator
+    = "-" / "~" / "_"
+
+Alphanumeric
+    = [a-zA-Z0-9]
+
+UnderscoreSeparator
+    = "__"
+```
 
-Links
------
+# Links
 
 *  [Source code](http://github.com/csdms/standard_names): The
    *standard_names* source code repository.

diff --git a/noxfile.py b/noxfile.py
@@ -14,7 +14,7 @@
 @nox.session(python=PYTHON_VERSION, venv_backend="conda")
 def test(session: nox.Session) -> None:
     """Run the tests."""
-    session.install(".[testing]")
+    session.install(".[peg,testing]")
 
     args = ["--cov", PROJECT, "-vvv"] + session.posargs
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -38,7 +38,6 @@ dynamic = [
 ]
 dependencies = [
     "packaging",
-    "pyyaml",
 ]
 
 [project.license]
@@ -51,6 +50,9 @@ Issues = "https://github.com/csdms/standard_names/issues"
 Repository = "https://github.com/csdms/standard_names"
 
 [project.optional-dependencies]
+peg = [
+    "pyparsing",
+]
 dev = [
     "nox",
 ]
@@ -64,11 +66,7 @@ docs = [
 ]
 
 [project.scripts]
-snbuild = "standard_names.cmd.snbuild:run"
-sndump = "standard_names.cmd.sndump:run"
-snscrape = "standard_names.cmd.snscrape:run"
-snsql = "standard_names.cmd.snsql:run"
-snvalidate = "standard_names.cmd.snvalidate:run"
+"standard-names" = "standard_names.cmd.main:main"
 
 [build-system]
 requires = [

diff --git a/src/standard_names/__main__.py b/src/standard_names/__main__.py
@@ -0,0 +1,6 @@
+from __future__ import annotations
+
+from standard_names.cli.main import main
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/src/standard_names/_format.py b/src/standard_names/_format.py
@@ -0,0 +1,117 @@
+from collections.abc import Iterable
+
+
+def as_wiki_list(
+    items: Iterable[str], heading: str | None = None, level: int = 1
+) -> str:
+    """
+    Examples
+    --------
+    >>> from standard_names._format import as_wiki_list
+
+    >>> print(as_wiki_list(["line 1", "line 2"], heading="Lines"))
+    = Lines =
+    <tt>
+    line 1<br/>
+    line 2<br/>
+    </tt>
+    """
+    newline = "\n"
+
+    if heading is not None:
+        formatted_lines = [f"{'=' * level} {heading} {'=' * level}"]
+    else:
+        formatted_lines = []
+
+    formatted_lines += ["<tt>"] + [item.strip() + "<br/>" for item in items] + ["</tt>"]
+
+    return newline.join(formatted_lines)
+
+
+def as_yaml_list(
+    items: Iterable[str], heading: str | None = None, level: int = 1
+) -> str:
+    """
+
+    Examples
+    --------
+    >>> from standard_names._format import as_yaml_list
+
+    >>> print(as_yaml_list(["line 1", "line 2"], heading="Lines"))
+    Lines:
+      - line 1
+      - line 2
+    """
+    newline = "\n"
+    indent = 2 if heading else 0
+    formatted_lines = [f"{heading}:"] if heading else []
+
+    if heading is None:
+        formatted_lines = []
+        indent = 0
+    else:
+        formatted_lines = [f"{heading}:"]
+        indent = 2
+
+    stripped_items = [stripped for item in items if (stripped := item.strip())]
+
+    if stripped_items:
+        formatted_lines += [f"{' ' * indent}- {item}" for item in stripped_items]
+    else:
+        formatted_lines += [f"{' ' * indent}[]"]
+
+    return newline.join(formatted_lines)
+
+
+def as_myst_list(
+    items: Iterable[str], heading: str | None = None, level: int = 1
+) -> str:
+    """
+
+    Examples
+    --------
+    >>> from standard_names._format import as_myst_list
+
+    >>> print(as_myst_list(["line 1", "line 2"], heading="Lines"))
+    # Lines
+    * line 1
+    * line 2
+    """
+    newline = "\n"
+
+    formatted_lines = ([f"# {heading}"] if heading else []) + [
+        f"* {stripped}" for item in items if (stripped := item.strip())
+    ]
+
+    return newline.join(formatted_lines)
+
+
+def as_text_list(
+    items: Iterable[str], heading: str | None = None, level: int = 1
+) -> str:
+    """
+
+    Examples
+    --------
+    >>> from standard_names._format import as_text_list
+
+    >>> print(as_text_list(["line 1", "line 2"], heading="# Lines"))
+    # Lines
+    line 1
+    line 2
+    """
+    newline = "\n"
+
+    formatted_lines = ([heading] if heading else []) + [
+        stripped for item in items if (stripped := item.strip())
+    ]
+
+    return newline.join(formatted_lines)
+
+
+FORMATTERS = {
+    "wiki": as_wiki_list,
+    "yaml": as_yaml_list,
+    "text": as_text_list,
+    "myst": as_myst_list,
+}
diff --git a/src/standard_names/cmd/__init__.py → src/standard_names/cli/__init__.py b/src/standard_names/cmd/__init__.py → src/standard_names/cli/__init__.py
diff --git a/src/standard_names/cli/_scrape.py b/src/standard_names/cli/_scrape.py
@@ -0,0 +1,82 @@
+#! /usr/bin/env python
+"""
+Example usage:
+
+```bash
+snscrape http://csdms.colorado.edu/wiki/CSN_Quantity_Templates \
+    http://csdms.colorado.edu/wiki/CSN_Object_Templates \
+    http://csdms.colorado.edu/wiki/CSN_Operation_Templates \
+    > data/scraped.yaml
+```
+"""
+from __future__ import annotations
+
+from collections.abc import Iterable
+from urllib.request import urlopen
+
+from standard_names.registry import NamesRegistry
+
+
+def scrape_names(files: Iterable[str]) -> NamesRegistry:
+    """Scrape standard names from a file or URL.
+
+    Parameters
+    ----------
+    files : iterable of str
+        Files to search for names.
+
+    Returns
+    -------
+    NamesRegistry
+        A registry of the names found in the files.
+    """
+    registry = NamesRegistry([])
+    for file in files:
+        registry |= NamesRegistry(search_file_for_names(file))
+    return registry
+
+
+def find_all_names(lines: Iterable[str], engine: str = "regex") -> set[str]:
+    """Find standard names.
+
+    Examples
+    --------
+    >>> from standard_names.cli._scrape import find_all_names
+
+    >>> contents = '''
+    ... A file with text and names (air__temperature) mixed in. Some names
+    ... have double underscores (like, Water__Temperature) by are not
+    ... valid names. Others, like water__temperature, or "wind__speed" are good.
+    ... '''
+    >>> sorted(find_all_names(contents.splitlines(), engine="regex"))
+    ['air__temperature', 'water__temperature', 'wind__speed']
+
+    >>> sorted(find_all_names(contents.splitlines(), engine="peg"))
+    ['air__temperature', 'water__temperature', 'wind__speed']
+    """
+    if engine == "regex":
+        from standard_names.regex import findall
+    elif engine == "peg":
+        from standard_names.peg import findall
+    else:
+        raise ValueError(
+            "engine not understood: {engine!r} is not one of 'regex', 'peg'"
+        )
+
+    names = set()
+    for line in lines:
+        names |= set(findall(line.strip()))
+
+    return names
+
+
+def search_file_for_names(path: str) -> set[str]:
+    names = set()
+    if path.startswith(("http://", "https://")):
+        with urlopen(path) as response:
+            names = find_all_names(line.decode("utf-8") for line in response)
+    else:
+        with open(path) as fp:
+            names = find_all_names(fp)
+
+    return names
diff --git a/src/standard_names/cmd/snsql.py → src/standard_names/cli/_sql.py b/src/standard_names/cmd/snsql.py → src/standard_names/cli/_sql.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+from __future__ import annotations
 
 import os
 
@@ -46,7 +46,7 @@ def as_sql_commands(names: NamesRegistry, newline: str = os.linesep) -> str:
     Examples
     --------
     >>> from standard_names.registry import NamesRegistry
-    >>> from standard_names.cmd.snsql import as_sql_commands
+    >>> from standard_names.cli._sql import as_sql_commands
 
     >>> names = NamesRegistry()
     >>> names.add("air__temperature")
@@ -99,25 +99,3 @@ def as_sql_commands(names: NamesRegistry, newline: str = os.linesep) -> str:
         commands = newline.join(db.iterdump())
 
     return commands
-
-
-def main() -> str:
-    """
-    Build a database of CSDMS standard names from a list.
-    """
-    import argparse
-
-    parser = argparse.ArgumentParser(
-        description="Build an sqlite database from a list of names"
-    )
-    parser.add_argument(
-        "file", nargs="+", type=argparse.FileType("r"), help="List of names"
-    )
-    args = parser.parse_args()
-
-    names = NamesRegistry(args.file)
-    return as_sql_commands(names)
-
-
-def run() -> None:
-    print(main())