Merge pull request #639 from biolink/add_poetry_target

add optional makefile target for generating pyproject.toml and a poetry environment from the existing requirements.txt file
biolink · Aug 12, 2023 · 81d5157 · 81d5157
2 parents 2ad6602 + 956046c
commit 81d5157
Show file tree

Hide file tree

Showing 10 changed files with 1,963 additions and 28 deletions.
diff --git a/.github/workflows/make-tests.yaml b/.github/workflows/make-tests.yaml
@@ -1,16 +1,26 @@
-name: refresh-readmes
-on: [push]
+on:
+  pull_request:
+    branches:
+      - master
 jobs:
-  run-make:
-    runs-on: ubuntu-20.04
+  # This workflow contains a single job called "build"
+  build:
+    # The type of runner that the job will run on
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python: [ "3.7", "3.8", "3.9", "3.10", "3.11" ]
+
+    # Steps represent a sequence of tasks that will be executed as part of the job
     steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
       - uses: actions/checkout@v2
         with:
           ref: ${{ github.head_ref }}
       - uses: actions/setup-python@v2
-        name: setup python
+        name: setup python environment
         with:
-          python-version: 3.6
+          python-version: ${{ matrix.python }}
       - name: Install Dependencies
         run: |
           python -m pip install --upgrade pip

diff --git a/.gitignore b/.gitignore
@@ -23,6 +23,10 @@ var/
 *.egg-info/
 .installed.cfg
 *.egg
+# keep poetry files out of the repo for now, until we decide if we are moving to a .toml file specification
+# for requirements in the future.
+pyproject.toml
+poetry.lock
 
 # PyInstaller
 #  Usually these files are written by a python script from a template

diff --git a/Makefile b/Makefile
@@ -61,3 +61,16 @@ nb:
 # used to make assoc_schema.py
 mm:
 	./bin/flask2marshmallow.pl ../biolink-api/biolink/datamodel/serializers.py
+
+poetry:
+	poetry config virtualenvs.in-project true
+	rm -f pyproject.toml
+	rm -f poetry.lock
+	poetry init --name "ontobio" --no-interaction
+	sed -i.bak 's/readme = "README\.md"/readme = "README\.rst"/' pyproject.toml
+	rm pyproject.toml.bak
+	poetry add $$( cat requirements.txt )
+	poetry install
+
+poetry-test:
+	poetry run make travis_test
diff --git a/README-developers.md b/README-developers.md
@@ -54,3 +54,42 @@ python setup.py sdist bdist_wheel bdist_egg
 twine upload --repository-url https://upload.pypi.org/legacy/ --username PYPI_USERNAME dist/*
 ```
 
+****************************************************************************************************
+
+#### to use a poetry development environment
+
+1. create the pyproject.toml file and generate the .venv directory
+```bash
+make poetry
+```
+this command deletes any existing pyproject.toml and poetry.lock files as well as the .venv virtual environment
+if it finds one.  It then creates a new pyproject.toml file out of the requirements.txt file, 
+creates a .venv directory, and finally installs the dependencies into it.  This also creates a poetry.lock file.
+At the moment, the poetry.lock and pyproject.toml files are both in .gitignore so that the source of truth for
+the built environment is still requirements.txt.
+
+2. to recreate the poetry virtual environment, just run the same `make poetry` command again, or if you want to avoid
+reinstalling all the dependencies, just `rm -rf .venv` which will remove the local virtual environment and then
+run `poetry install` to install via the `poetry.lock` file created in step 1 above. 
+
+helpful poetry commands:
+```bash
+poetry install # install dependencies from poetry.lock
+poetry run <command> # run a command in the poetry virtual environment
+poetry env list # list all virtual environments and tags the one currently in use for the project
+poetry show --why --tree [pypi_package_name] # show the dependency tree for pypi_package_name
+poetry show [pypi_package_name] # show the version of pypi_package_name that is install in the current venv.
+```
+
+If we use a pyproject.toml file then we can use poetry to manage the dependencies and the virtual environment.
+But for now, managing the dependencies in the requirements.txt file means that we don't want to add/update/remove
+dependencies from pyproject.toml directly, nor do we want it to ever be the source of truth for the dependencies.
+
+```bash
+poetry add <package> # add a package to the pyproject.toml file and install it in the virtual environment
+poetry remove <package> # remove a package from the pyproject.toml file and uninstall it from the virtual environment
+poetry update # update all packages in the pyproject.toml file and the poetry.lock file
+poetry update <package> # update the specified package in the pyproject.toml file and the poetry.lock file
+poetry lock --no-update # update the poetry.lock file without updating the pyproject.toml file -- used when editing the 
+# pyproject.toml file directly. 
+```
diff --git a/ontobio/rdfgen/gocamgen/gocam_builder.py b/ontobio/rdfgen/gocamgen/gocam_builder.py
@@ -44,7 +44,7 @@ class GoCamBuilder:
     def __init__(self, parser_config: AssocParserConfig, modelstate=None):
         self.config = parser_config
         self.aspector = GoAspector(self.config.ontology)
-        self.store = plugin.get('IOMemory', Store)()
+        self.store = plugin.get('Memory', Store)()
         self.errors = GeneErrorSet()  # Errors by gene ID
         self.gpi_entities = self.parse_gpi(parser_config.gpi_authority_path)
         self.modelstate = modelstate

diff --git a/ontobio/sparql/skos.py b/ontobio/sparql/skos.py
@@ -7,14 +7,32 @@
 from rdflib.namespace import SKOS
 from prefixcommons.curie_util import contract_uri
 
-from ontobio.ontol import Ontology, Synonym
+from ontobio.ontol import Ontology, Synonym, TextDefinition
 
 # TODO: make configurable
 GEMET = Namespace('http://www.eionet.europa.eu/gemet/2004/06/gemet-schema.rdf#')
 
 logger = logging.getLogger(__name__)
 
 
+def _preferred_label(rg, concept):
+    """
+    Return a list of (label_prop, label) pairs, where label_prop is either skos:prefLabel or rdfs:label.
+
+    :param: rg: rdflib.Graph object
+    :param: concept: rdflib.URIRef object
+    :param: lang: language code
+
+    """
+
+    labels = list(rg.objects(concept, SKOS.prefLabel))
+    if len(labels) == 0:
+        print("No labels for {}".format(concept))
+
+    else:
+        return [(SKOS.prefLabel, l_) for l_ in labels]
+
+
 class Skos(object):
     """
     SKOS is an RDF data model for representing thesauri and terminologies.
@@ -29,9 +47,9 @@ def __init__(self, prefixmap=None, lang='en'):
 
     def _uri2id(self, uri):
         s = "{:s}".format(str(uri))
-        for prefix,uribase in self.prefixmap.items():
-            if (s.startswith(uribase)):
-                s = s.replace(uribase,prefix+":")
+        for prefix, uribase in self.prefixmap.items():
+            if s.startswith(uribase):
+                s = s.replace(uribase, prefix+":")
                 return s
         curies = contract_uri(uri)
         if len(curies) > 0:
@@ -57,8 +75,8 @@ def process_rdfgraph(self, rg, ont=None):
 
         Arguments
         ---------
-        rg: rdflib.Graph
-            graph object
+        :param: rg: rdflib.Graph object
+        :param: ont: ontobio.ontol.Ontology object
 
         Returns
         -------
@@ -79,14 +97,13 @@ def process_rdfgraph(self, rg, ont=None):
                 subset_map[self._uri2id(s)] = s
 
         for concept in sorted(list(rg.subjects(RDF.type, SKOS.Concept))):
-            concept_uri = str(concept)
             id=self._uri2id(concept)
             logger.info("ADDING: {}".format(id))
-            ont.add_node(id, self._get_label(rg,concept))
+            ont.add_node(id, self._get_label(rg, concept))
 
             for defn in rg.objects(concept, SKOS.definition):
-                if (defn.language == self.lang):
-                    td = TextDefinition(id, escape_value(defn.value))
+                if defn.language == self.lang:
+                    td = TextDefinition(id, defn.value)
                     ont.add_text_definition(td)
 
             for s in rg.objects(concept, SKOS.broader):
@@ -112,12 +129,12 @@ def _get_schemes(self, rg, concept):
         schemes.update(rg.objects(concept, GEMET.group))
         return schemes
 
-    def _get_label(self, rg,concept):
-        labels = sorted(rg.preferredLabel(concept, lang=self.lang))
-        if len(labels) == 0:
+    def _get_label(self, rg, concept):
+        if _preferred_label(rg, concept) is not None:
+            labels = sorted(_preferred_label(rg, concept))
+        else:
             return None
         if len(labels) > 1:
             logger.warning(">1 label for {} : {}".format(concept, labels))
         return labels[0][1].value
-
-
+