From 0db5acbb418dbba514f28fe0d90e0e0d4217ad55 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Thu, 21 Jul 2022 17:43:47 +0200 Subject: [PATCH 01/33] feat: Added spell checking github action for documentation --- .github/workflows/spellcheck.yml | 22 ++++++++ .spellcheck.yml | 30 +++++++++++ docs/spellcheck_exceptions.txt | 87 ++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+) create mode 100644 .github/workflows/spellcheck.yml create mode 100644 .spellcheck.yml create mode 100644 docs/spellcheck_exceptions.txt diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml new file mode 100644 index 0000000..8f565e7 --- /dev/null +++ b/.github/workflows/spellcheck.yml @@ -0,0 +1,22 @@ +# This is workflow for spell checking using PySpelling lib (https://pypi.org/project/pyspelling/) +name: Spellcheck +# Controls when the action will run. +on: + # Triggers the workflow on push or pull request events but only for the main branch + #push: + # branches: [ master,docs,develop ] + #pull_request: + # branches: [ master ] + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + spellcheck: + # The type of runner that the job will run on + runs-on: ubuntu-latest + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + # Spellcheck + - uses: actions/checkout@v2 + - uses: igsekor/pyspelling-any@v0.0.2 + name: Spellcheck diff --git a/.spellcheck.yml b/.spellcheck.yml new file mode 100644 index 0000000..984e0a2 --- /dev/null +++ b/.spellcheck.yml @@ -0,0 +1,30 @@ +spellchecker: aspell +matrix: + - name: Check the english docs + aspell: + lang: en + sources: + - docs/source/*/*.md + default_encoding: utf-8 + pipeline: + - pyspelling.filters.context: + context_visible_first: true + escapes: \\[\\`~] + delimiters: + # Ignore multiline content between fences (fences can have 3 or more back ticks) + # ``` + # content + # ``` + - open: '(?s)(?P *`{3,})' + close: '(?P=open)' + # Ignore text between inline back ticks + - open: '(?P`+)' + close: '(?P=open)' + # Ignore text in brackets [] and () + - open: '\[' + close: '\]' + - open: '\(' + close: '\)' + dictionary: + wordlists: + - docs/spellcheck_exceptions.txt diff --git a/docs/spellcheck_exceptions.txt b/docs/spellcheck_exceptions.txt new file mode 100644 index 0000000..4a7ba9a --- /dev/null +++ b/docs/spellcheck_exceptions.txt @@ -0,0 +1,87 @@ +auth +APIs +BashActivator +CPython +CShellActivator +Dex +DSL +EmptyDirVolumeSource +FishActivator +FromAppData +GCP +Github +IAP +IfNotPresent +JupyterLab +KFP +Kedro +Kubeflow +Kubernetes +LDAP +MLFlow +MLMD +Mlflow +NoSchedule +OpenID +Pipelnes +Posix +PowerShellActivator +Preprequisites +PythonActivator +Quickstart +ReadWriteMany +ReadWriteOnce +SDK +UI +VM +XonshActivator +YAML +backoff +behaviour +cls +conf +config +cpu +cron +datasets +dev +dir +env +faac +getindata +github +gpu +http +https +init +io +kedro +kfp +kfpclient +kubeflow +mario +mlflow +num +nvidia +oauth +oidc +packageable +params +pilicy +quickstart +readthedocs +scalable +setuptools +shm +src +storageclass +templated +tensorflow +ttl +txt +url +venv +versionable +virtualenv +wikipedia +yaml From 47f164055c2e2edd8879f765186e6c79f721a09a Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Thu, 21 Jul 2022 17:44:49 +0200 Subject: [PATCH 02/33] docs: Removed spelling mistakes as reported by new workflow --- docs/source/01_introduction/01_intro.md | 2 +- docs/source/02_installation/01_installation.md | 4 ++-- docs/source/03_getting_started/01_quickstart.md | 8 ++++---- docs/source/03_getting_started/02_gcp.md | 6 +++--- docs/source/03_getting_started/03_mlflow.md | 4 ++-- .../source/03_getting_started/04_continuous_deployment.md | 6 ++++-- docs/source/03_getting_started/05_authentication.md | 4 ++-- 7 files changed, 18 insertions(+), 16 deletions(-) diff --git a/docs/source/01_introduction/01_intro.md b/docs/source/01_introduction/01_intro.md index ea49749..c8e3ca1 100644 --- a/docs/source/01_introduction/01_intro.md +++ b/docs/source/01_introduction/01_intro.md @@ -17,5 +17,5 @@ docker images that cover not only the source itself, but also all the libraries entire execution environment. Portability is also one of the key Kedro qualities, as the pipelines must be versionable and packageable. Kedro, with [Kedro-docker](https://github.com/quantumblacklabs/kedro-docker) plugin does a fantastic -job to achieve this and Kubeflow looks like a nice addon to run the pipelines +job to achieve this and Kubeflow looks like a nice add-on to run the pipelines on powerful remote Kubernetes clusters. diff --git a/docs/source/02_installation/01_installation.md b/docs/source/02_installation/01_installation.md index e63be8c..6e57ada 100644 --- a/docs/source/02_installation/01_installation.md +++ b/docs/source/02_installation/01_installation.md @@ -12,7 +12,7 @@ $ pip install 'kedro<0.17' ## Plugin installation -### Install from PyPI +### Install from `PyPI` You can install ``kedro-kubeflow`` plugin from ``PyPi`` with `pip`: @@ -30,7 +30,7 @@ pip install git+https://github.com/getindata/kedro-kubeflow.git@develop ## Available commands -You can check available commands by going into project directory and runnning: +You can check available commands by going into project directory and running: ```console $ kedro kubeflow diff --git a/docs/source/03_getting_started/01_quickstart.md b/docs/source/03_getting_started/01_quickstart.md index e0d5f07..de8f6fa 100644 --- a/docs/source/03_getting_started/01_quickstart.md +++ b/docs/source/03_getting_started/01_quickstart.md @@ -2,7 +2,7 @@ ## Preprequisites -The quickstart assumes user have access to Kubeflow Pipelines deployment. Pipelines can be dedployed on any Kubernetes cluster, including [local cluster](https://www.kubeflow.org/docs/pipelines/installation/localcluster-deployment/). +The quickstart assumes user have access to Kubeflow Pipelines deployment. Pipelines can be deployed on any Kubernetes cluster, including [local cluster](https://www.kubeflow.org/docs/pipelines/installation/localcluster-deployment/). ## Install the toy project with Kubeflow Pipelines support @@ -18,7 +18,7 @@ created virtual environment CPython3.8.5.final.0-64 in 145ms $ source venv-demo/bin/activate ``` -Then, `kedro` must be present to enable cloning the starter project, along with the latest version of `kedro-kubeflow` plugina and kedro-docker (required to build docker images with the Kedro pipeline nodes): +Then, `kedro` must be present to enable cloning the starter project, along with the latest version of `kedro-kubeflow` plugin and `kedro-docker` (required to build docker images with the Kedro pipeline nodes): ``` $ pip install 'kedro<0.18' kedro-kubeflow kedro-docker @@ -89,13 +89,13 @@ First, initialize the project with `kedro-docker` configuration by running: $ kedro docker init ``` -This command creates a several files, including `.dockerignore`. This file ensures that transient files are not included in the docker image and it requires small adjustment. Open it in your favourite text editor and extend the section `# except the following` by adding there: +This command creates a several files, including `.dockerignore`. This file ensures that transient files are not included in the docker image and it requires small adjustment. Open it in your favorite text editor and extend the section `# except the following` by adding there: ```console !data/01_raw ``` -This change enforces raw data existence in the image. Also, one of the limitations of running the Kedro pipeline on Kubeflow (and not on local environemt) is inability to use MemoryDataSets, as the pipeline nodes do not share memory, so every artifact should be stored as file. The `spaceflights` demo configures four datasets as in-memory, so let's change the behaviour by adding these lines to `conf/base/catalog.yml`: +This change enforces raw data existence in the image. Also, one of the limitations of running the Kedro pipeline on Kubeflow (and not on local environemt) is inability to use `MemoryDataSets`, as the pipeline nodes do not share memory, so every artifact should be stored as file. The `spaceflights` demo configures four datasets as in-memory, so let's change the behaviour by adding these lines to `conf/base/catalog.yml`: ```console X_train: diff --git a/docs/source/03_getting_started/02_gcp.md b/docs/source/03_getting_started/02_gcp.md index 6f8a326..d1c48b7 100644 --- a/docs/source/03_getting_started/02_gcp.md +++ b/docs/source/03_getting_started/02_gcp.md @@ -9,7 +9,7 @@ tasks with use of Kubeflow based components. an easy way to manage and host JupyterLab based data science workbench environment. What we've found out is that the default images provided by a service cause some dependency conflicts. To avoid this issues make sure you use isolated virtual -environment, e.g. [virtualenv](https://pypi.org/project/virtualenv/). New virual +environment, e.g. [virtualenv](https://pypi.org/project/virtualenv/). New virtual environment can be created by simply invoking `python -m virtualenv venv` command. ### Using `kedro-kubeflow` with AI Platform Pipelines @@ -20,7 +20,7 @@ on new or existing Google Kubernetes Engine clusters. In general `kedro-kubeflow` plugin should work with AI Platform Pipelines out of the box, with the only exception is that it requires authentication. Note that the `host` -variable should point to a dashbard URL generated by AI Platform Pipelines service +variable should point to a dashboard URL generated by AI Platform Pipelines service (e.g. https://653hddae86eb7b0-dot-europe-west1.pipelines.googleusercontent.com/), just open the dashboard from the [service page](https://console.cloud.google.com/ai-platform/pipelines/clusters) and copy url from the browser. @@ -56,5 +56,5 @@ cloud application-default login`. ### Using `kedro-kubeflow` with Vertex AI Pipelines (DEPRECATED) -Vertex AI Pipelines support in `kedro-kubeflow` has been deprecated in favour of the +Vertex AI Pipelines support in `kedro-kubeflow` has been deprecated in favor of the new plugin [kedro-vertexai](https://kedro-vertexai.readthedocs.io/en/latest/) diff --git a/docs/source/03_getting_started/03_mlflow.md b/docs/source/03_getting_started/03_mlflow.md index b29aeed..a60aac0 100644 --- a/docs/source/03_getting_started/03_mlflow.md +++ b/docs/source/03_getting_started/03_mlflow.md @@ -19,7 +19,7 @@ Then, adjust the kedro-mlflow configuration and point to the mlflow server by ed $ kedro docker build ``` -And re-push the image to the remote registry. Finally, reupload the pipeline: +And re-push the image to the remote registry. Finally, re-upload the pipeline: ```console $ kedro kubeflow upload-pipeline @@ -37,4 +37,4 @@ Finally, start the pipeline. While it executes, the new Mlflow run is started an ![Mlflow UI](mlflow_ui.png) -The UI presents the pipeline stauts (in form of the icon) and latest node that was run (for failed runs in indicates at what step did the pipeline fail). Also, the `kubeflow_run_id` tag can be used to correlate Mlflow run with the Kubeflow pipeline execution. +The UI presents the pipeline status (in form of the icon) and latest node that was run (for failed runs in indicates at what step did the pipeline fail). Also, the `kubeflow_run_id` tag can be used to correlate Mlflow run with the Kubeflow pipeline execution. diff --git a/docs/source/03_getting_started/04_continuous_deployment.md b/docs/source/03_getting_started/04_continuous_deployment.md index a3a8699..d78c4d1 100644 --- a/docs/source/03_getting_started/04_continuous_deployment.md +++ b/docs/source/03_getting_started/04_continuous_deployment.md @@ -2,12 +2,12 @@ With kedro pipelines started on the remote Kubeflow Pipelnes clusters, changes in the code require re-building docker images and (sometimes) changing the pipeline structure. To simplify this workflow, Kedro-kubeflow plugin is capable of creating configuration for the most popular CI/CD automation tools. -The autogenerated configuration defines these actions: +The auto-generated configuration defines these actions: * on any new push to the repository - image is re-built and the pipeline is started using `run-once`, * on merge to master - image is re-built, the pipeline is registered in the Pipelines and scheduled to execute on the daily basis. -The behaviour and parameters (like schedule expression) can be adjusted by editing the genrated files. The configuration assumes that Google Container Registry is used to store the images, but users can freely adapt it to any (private or public) docker images registry. +The behaviour and parameters (like schedule expression) can be adjusted by editing the generated files. The configuration assumes that Google Container Registry is used to store the images, but users can freely adapt it to any (private or public) docker images registry. ## Github Actions @@ -19,6 +19,8 @@ If the Kedro project is stored on github (either in private or public repository Next, re-configure the project using +``` kedro kubeflow init --with-github-actions https://.endpoints..cloud.goog/pipelines +``` This command will generate Github Actions in `.github/workflows` directory. Then push the code to any branch and go to "Actions" tab in Github interface. diff --git a/docs/source/03_getting_started/05_authentication.md b/docs/source/03_getting_started/05_authentication.md index 8bb1ad7..adce5cc 100644 --- a/docs/source/03_getting_started/05_authentication.md +++ b/docs/source/03_getting_started/05_authentication.md @@ -6,13 +6,13 @@ Plugin supports 2 ways of authenticating to Kubeflow Pipelines API: It's already described in [GCP AI Platform support](02_gcp.md) chapter. -## 2. KFP behind Dex+authservice +## 2. KFP behind Dex with `authservice` Dex is the recommended authentication mechanism for on-premise Kubeflow clusters. The usual setup looks in a way that: * [oidc-autheservice](https://github.com/arrikto/oidc-authservice) redirect unauthenticated users to Dex, * [Dex](https://github.com/dexidp/dex) authenticates user in remote system, like LDAP or OpenID and also acts as OpenID provider, -* oidc-autheservice asks Dex for token and creates the session used across entire Kubeflow. +* `oidc-autheservice` asks Dex for token and creates the session used across entire Kubeflow. In order to use `kedro-kubeflow` behind Dex-secured clusters, use the following manual: From 323a7c2fabef451c038cc16373c7a026c13b922a Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Thu, 21 Jul 2022 20:30:11 +0200 Subject: [PATCH 03/33] docs: Added extra steps to quickstart that were missing --- docs/source/03_getting_started/01_quickstart.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/source/03_getting_started/01_quickstart.md b/docs/source/03_getting_started/01_quickstart.md index de8f6fa..7930313 100644 --- a/docs/source/03_getting_started/01_quickstart.md +++ b/docs/source/03_getting_started/01_quickstart.md @@ -54,6 +54,17 @@ Change directory to the project generated in /home/mario/kedro/kubeflow-plugin-d A best-practice setup includes initialising git and creating a virtual environment before running `kedro install` to install project-specific dependencies. Refer to the Kedro documentation: https://kedro.readthedocs.io/ ``` +There are some adjustments that need to be made to run starter Kedro project on Kubeflow. We need to replace all dots in names of catalog with other characters as Kubeflow does not accept dots in names. You can do it by using sed in starter root directory: +```console +for i in {1..10}; do sed -r 's/^([^ \t])([^. ]*)\./\1\2_/g' conf/base/catalog.yml > conf/base/catalog.yml; done +``` + +Before installing the dependencies, add the `kedro-kubeflow` and `kedro-docker` to `requirements.*` in src: +```console +echo kedro-kubeflow >> src/requirements* # TODO - add explicite version +echo kedro-docker >> src/requirements* # TODO - add explicite version based on package requirements +``` + Finally, go the demo project directory and ensure that kedro-kubeflow plugin is activated: ```console From b20dd40eba99a1a2b78d37e6223b6b434ae197b3 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Thu, 21 Jul 2022 20:31:10 +0200 Subject: [PATCH 04/33] docs: Added entry stub about contributing guidelines and local testing --- docs/source/04_contributing/01_guideline.md | 27 +++++++++++ .../04_contributing/02_local_testing.md | 45 +++++++++++++++++++ docs/source/04_contributing/index.rst | 11 +++++ 3 files changed, 83 insertions(+) create mode 100644 docs/source/04_contributing/01_guideline.md create mode 100644 docs/source/04_contributing/02_local_testing.md create mode 100644 docs/source/04_contributing/index.rst diff --git a/docs/source/04_contributing/01_guideline.md b/docs/source/04_contributing/01_guideline.md new file mode 100644 index 0000000..9b59e69 --- /dev/null +++ b/docs/source/04_contributing/01_guideline.md @@ -0,0 +1,27 @@ +# Contributing guidelines + +Everyone is welcome to contribute to the code of this plugin, however we have some automation and guidelines you should get familiar with first. + +There are few things that you should know about our workflow: +- All changes should be made as pull requests to develop branch. +- On release versions from develop branch are tagged and pulled to the master branch. +- For commits we follow [angular commit messages guideline](https://github.com/angular/angular/blob/main/CONTRIBUTING.md#commit). + +## Updating the documentation + +For documentation updates we have `docs/Makefile` that runs `Sphinx` to update the `readthedocs`. + +There is automation using github actions that checks the spelling of the docs. [Links](), `blocks escaped with back ticks` \`...\` are omitted. In order to add new spelling exceptions, append them to the `docs/spellcheck_exceptions.txt` file. + +## Github hooks + +For linting and keeping code clean we use pre-commit package to join with github hooks. Use it by doing: + +```bash +pip install pre-commit +pre-commit install +``` + +## Releasing new versions + +TBD diff --git a/docs/source/04_contributing/02_local_testing.md b/docs/source/04_contributing/02_local_testing.md new file mode 100644 index 0000000..6a996bc --- /dev/null +++ b/docs/source/04_contributing/02_local_testing.md @@ -0,0 +1,45 @@ +# Local testing + +## Unit tests + +The plugin has unit tests that can be run with `tox`: +```console +pip install tox-pip-version +tox -v -e py38 +``` + +You can also run them manually by executing `python -m unittest` in the root folder. They are also executed with github action on pull requests to test the stability of new changes. See `.github/workflows/python-package.yml`. + +## E2E tests + +There is also a set up with Kubeflow running on team-maintained Google Cloud Platform. It tests the execution on said Kubeflow platform with `spaceflight` kedro starter. They are also automated with github action. See `.github/workflows/e2e-tests.yml`. + +## Local cluster testing + +If you have enough RAM, there is also an option to test locally with running [Kubernetes in docker (kind)](https://getindata.com/blog/kubeflow-pipelines-running-5-minutes/). After going through that guideline you should have Kubeflow up and running available at `http://localhost:9000`. + +There are few differences from (quickstart)[#Quickstart]. For `kedro init` use the `http://localhost:9000` as an endpoint. + +The kind has its own docker registry that you need to upload the image to. However, since it does not have any connection to other registry we want to prevent it from trying to pull any image. In order to do that, we need to tag the built docker image with specific version. + +Locate your image name (it should be the same as kedro project name) with: +``` +docker images +``` + +Then tag your image with the following command (the version is arbitrary and can be any other version): +``` +docker tag :latest :1.0 +``` + +Then you need to upload the image from local registry to the kind registry. Here `kfp` is the cluster name, the same as used in guide in the link above. Default cluster name is `kind`. +``` +kind load docker-image :1.0 --name kfp +``` + +Lastly, in order to run a job or a schedule, you need to specify the image with image version, i.e: +``` +kedro kubeflow run-once -i docker.io/library/:1.0 +``` + +With that you should be able to test the plugin end to end with local sandbox Kubeflow cluster. \ No newline at end of file diff --git a/docs/source/04_contributing/index.rst b/docs/source/04_contributing/index.rst new file mode 100644 index 0000000..1270849 --- /dev/null +++ b/docs/source/04_contributing/index.rst @@ -0,0 +1,11 @@ +Getting started +=============== + +.. toctree:: + :maxdepth: 4 + + Quickstart <01_quickstart.md> + Google Cloud Platform support <02_gcp.md> + Kedro-Mlflow integration <03_mlflow.md> + Continuous Deployment <04_continuous_deployment.md> + Authenticating to Kubeflow Pipelines API <05_authentication.md> From 1552ed3d83aba43f8c44498f549d4a08951e6eb0 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Thu, 21 Jul 2022 20:33:08 +0200 Subject: [PATCH 05/33] fixup! docs: Added extra steps to quickstart that were missing --- docs/source/03_getting_started/01_quickstart.md | 2 +- docs/spellcheck_exceptions.txt | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/03_getting_started/01_quickstart.md b/docs/source/03_getting_started/01_quickstart.md index 7930313..e61672a 100644 --- a/docs/source/03_getting_started/01_quickstart.md +++ b/docs/source/03_getting_started/01_quickstart.md @@ -54,7 +54,7 @@ Change directory to the project generated in /home/mario/kedro/kubeflow-plugin-d A best-practice setup includes initialising git and creating a virtual environment before running `kedro install` to install project-specific dependencies. Refer to the Kedro documentation: https://kedro.readthedocs.io/ ``` -There are some adjustments that need to be made to run starter Kedro project on Kubeflow. We need to replace all dots in names of catalog with other characters as Kubeflow does not accept dots in names. You can do it by using sed in starter root directory: +There are some adjustments that need to be made to run starter Kedro project on Kubeflow. We need to replace all dots in names of catalog with other characters as Kubeflow does not accept dots in names. You can do it by using `sed` in starter root directory: ```console for i in {1..10}; do sed -r 's/^([^ \t])([^. ]*)\./\1\2_/g' conf/base/catalog.yml > conf/base/catalog.yml; done ``` diff --git a/docs/spellcheck_exceptions.txt b/docs/spellcheck_exceptions.txt index 4a7ba9a..39333a6 100644 --- a/docs/spellcheck_exceptions.txt +++ b/docs/spellcheck_exceptions.txt @@ -1,10 +1,9 @@ -auth APIs BashActivator CPython CShellActivator -Dex DSL +Dex EmptyDirVolumeSource FishActivator FromAppData @@ -32,10 +31,12 @@ Quickstart ReadWriteMany ReadWriteOnce SDK +TBD UI VM XonshActivator YAML +auth backoff behaviour cls @@ -68,6 +69,7 @@ oidc packageable params pilicy +pre quickstart readthedocs scalable From adfa9c3d89cb481dda7edb46096c7d44f7508739 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Fri, 22 Jul 2022 10:18:58 +0200 Subject: [PATCH 06/33] docs: added dictionaries generated by pyspelling to ignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 8192175..f7ae76a 100644 --- a/.gitignore +++ b/.gitignore @@ -123,4 +123,7 @@ venv.bak/ # mypy .mypy_cache/ +# pyspelling +dictionary.dic + docs/_build From fcb0faf0d5d8e1a97668e40caf692b4d8f3f8ccf Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Fri, 22 Jul 2022 10:22:09 +0200 Subject: [PATCH 07/33] fixup! docs: Added entry stub about contributing guidelines and local testing --- docs/index.rst | 1 + docs/source/04_contributing/index.rst | 9 +++------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index a691c03..75c270e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,6 +13,7 @@ Welcome to Kedro Kubeflow Plugin's documentation! Introduction Installation Getting Started + Contributing Indices and tables ================== diff --git a/docs/source/04_contributing/index.rst b/docs/source/04_contributing/index.rst index 1270849..6df24b3 100644 --- a/docs/source/04_contributing/index.rst +++ b/docs/source/04_contributing/index.rst @@ -1,11 +1,8 @@ -Getting started +Contributing =============== .. toctree:: :maxdepth: 4 - Quickstart <01_quickstart.md> - Google Cloud Platform support <02_gcp.md> - Kedro-Mlflow integration <03_mlflow.md> - Continuous Deployment <04_continuous_deployment.md> - Authenticating to Kubeflow Pipelines API <05_authentication.md> + Contributing guideline <01_guideline.md> + Local testing <02_local_testing.md> From 804693d8a8861fe97b6e160bbeb5348586104ae2 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Fri, 22 Jul 2022 10:28:06 +0200 Subject: [PATCH 08/33] fixup! feat: Added spell checking github action for documentation --- .github/workflows/spellcheck.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml index 8f565e7..1af5ecf 100644 --- a/.github/workflows/spellcheck.yml +++ b/.github/workflows/spellcheck.yml @@ -3,10 +3,12 @@ name: Spellcheck # Controls when the action will run. on: # Triggers the workflow on push or pull request events but only for the main branch - #push: - # branches: [ master,docs,develop ] - #pull_request: - # branches: [ master ] + push: + branches: [ master,develop ] + paths: "docs/**" + pull_request: + branches: [ master ] + paths: "docs/**" # Allows you to run this workflow manually from the Actions tab workflow_dispatch: # A workflow run is made up of one or more jobs that can run sequentially or in parallel @@ -19,4 +21,4 @@ jobs: # Spellcheck - uses: actions/checkout@v2 - uses: igsekor/pyspelling-any@v0.0.2 - name: Spellcheck + name: Spellcheck \ No newline at end of file From f90599b06342b47fb18f68d675ed1388557ce8e8 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Fri, 22 Jul 2022 10:31:27 +0200 Subject: [PATCH 09/33] fixup! feat: Added spell checking github action for documentation --- .github/workflows/spellcheck.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml index 1af5ecf..8e57a56 100644 --- a/.github/workflows/spellcheck.yml +++ b/.github/workflows/spellcheck.yml @@ -4,10 +4,8 @@ name: Spellcheck on: # Triggers the workflow on push or pull request events but only for the main branch push: - branches: [ master,develop ] paths: "docs/**" pull_request: - branches: [ master ] paths: "docs/**" # Allows you to run this workflow manually from the Actions tab workflow_dispatch: From 6e24480bcf68f4e659cd9274fdbb05200d95f566 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Tue, 26 Jul 2022 10:49:12 +0200 Subject: [PATCH 10/33] refactor: changed spellchecking source image The new image uses the same tool and is more actively maintained --- .github/workflows/spellcheck.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml index 8e57a56..991f8d7 100644 --- a/.github/workflows/spellcheck.yml +++ b/.github/workflows/spellcheck.yml @@ -18,5 +18,5 @@ jobs: steps: # Spellcheck - uses: actions/checkout@v2 - - uses: igsekor/pyspelling-any@v0.0.2 - name: Spellcheck \ No newline at end of file + - uses: rojopolis/spellcheck-github-actions@0.25.0 + name: Spellcheck From 5809738aab2cd434feb726595ab3d7ebf60a9255 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Tue, 26 Jul 2022 17:17:31 +0200 Subject: [PATCH 11/33] docs: Updating the guide according to local testing, wip; testing note boxes on rtd --- .../03_getting_started/01_quickstart.md | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/docs/source/03_getting_started/01_quickstart.md b/docs/source/03_getting_started/01_quickstart.md index e61672a..dad0036 100644 --- a/docs/source/03_getting_started/01_quickstart.md +++ b/docs/source/03_getting_started/01_quickstart.md @@ -24,10 +24,10 @@ Then, `kedro` must be present to enable cloning the starter project, along with $ pip install 'kedro<0.18' kedro-kubeflow kedro-docker ``` -With the dependencies in place, let's create a new project: +With the dependencies in place, let's create a new project (with the latest supported kedro version - 0.17.7): ``` -$ kedro new --starter=spaceflights +$ kedro new --starter=spaceflights --checkout=0.17.7 Project Name: ============= @@ -54,22 +54,20 @@ Change directory to the project generated in /home/mario/kedro/kubeflow-plugin-d A best-practice setup includes initialising git and creating a virtual environment before running `kedro install` to install project-specific dependencies. Refer to the Kedro documentation: https://kedro.readthedocs.io/ ``` -There are some adjustments that need to be made to run starter Kedro project on Kubeflow. We need to replace all dots in names of catalog with other characters as Kubeflow does not accept dots in names. You can do it by using `sed` in starter root directory: +Next go the demo project directory: ```console -for i in {1..10}; do sed -r 's/^([^ \t])([^. ]*)\./\1\2_/g' conf/base/catalog.yml > conf/base/catalog.yml; done +$ cd kubeflow-plugin-demo/ ``` Before installing the dependencies, add the `kedro-kubeflow` and `kedro-docker` to `requirements.*` in src: ```console -echo kedro-kubeflow >> src/requirements* # TODO - add explicite version -echo kedro-docker >> src/requirements* # TODO - add explicite version based on package requirements +$ echo kedro-kubeflow >> src/requirements* ``` -Finally, go the demo project directory and ensure that kedro-kubeflow plugin is activated: +Finally, ensure that kedro-kubeflow plugin is activated: ```console -$ cd kubeflow-plugin-demo/ -$ kedro install +$ pip install -r src/requirements.txt (...) Requirements installed! $ kedro kubeflow --help @@ -139,8 +137,8 @@ kedro docker build When execution finishes, your docker image is ready. If you don't use local cluster, you should push the image to the remote repository: ```console -docker tag kubeflow_plugin_demo:latest remote.repo.url.com/kubeflow_plugin_demo:latest -docker push remote.repo.url.com/kubeflow_plugin_demo:latest +docker tag kubeflow-plugin-demo:latest remote.repo.url.com/kubeflow-plugin-demo:latest +docker push remote.repo.url.com/kubeflow-plugin-demo:latest ``` ## Run the pipeline on Kubeflow @@ -150,11 +148,19 @@ First, run `init` script to create the sample configuration. A parameter value s ```console kedro kubeflow init https://kubeflow.cluster.com (...) -Configuration generated in /home/mario/kedro/kubeflow-plugin-demo/conf/base/kubeflow.yaml +Configuration generated in /home/user/kedro/kubeflow-plugin-demo/conf/base/kubeflow.yaml ``` +````{note} +Since kedro 0.17 there have been introduced namespaces to datasets. If you wish to experiment on newer versions, consider the following in this step: The namespaces feature has not been tested yet with the plugin and there are some errors with dots in kfp artifacts names, so for now it's best to disable it by adding the following line in `conf/base/kubeflow.yaml`: +```yaml +store_kedro_outputs_as_kfp_artifacts: False +``` +```` + Then, if needed, adjust the `conf/base/kubeflow.yaml`. For example, the `image:` key should point to the full image name (like `remote.repo.url.com/kubeflow_plugin_demo:latest` if you pushed the image at this name). Depending on the storage classes availability in Kubernetes cluster, you may want to modify `volume.storageclass` and `volume.access_modes` (please consult with Kubernetes admin what values should be there). + Finally, everything is set to run the pipeline on Kubeflow. Run `upload-pipeline`: ```console From 67e4f4f36f6364a49ac6a0e069a7177983452fed Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Wed, 27 Jul 2022 11:23:13 +0200 Subject: [PATCH 12/33] build: Swapped recommonmark to myst Recommonmark is no longer supported and points to myst as replacement: https://recommonmark.readthedocs.io/en/latest/ --- docs/conf.py | 6 +++--- docs/requirements.txt | 1 + setup.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 44b4c91..75ee2e3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -43,7 +43,7 @@ # "sphinx.ext.ifconfig", # "sphinx.ext.viewcode", # "sphinx.ext.mathjax", - "recommonmark", + "myst_parser", "sphinx_rtd_theme", ] @@ -74,8 +74,8 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] +# html_static_path = ["_static"] -language = None +language = "en" pygments_style = "sphinx" diff --git a/docs/requirements.txt b/docs/requirements.txt index efa7407..beefe52 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1,4 @@ sphinx sphinx_rtd_theme recommonmark +myst-parser diff --git a/setup.py b/setup.py index bf8ba05..6ccf90a 100644 --- a/setup.py +++ b/setup.py @@ -28,8 +28,8 @@ ], "docs": [ "sphinx==5.0.2", - "recommonmark==0.7.1", "sphinx_rtd_theme==1.0.0", + "myst-parser==0.18.0", ], "gcp": [ "google-auth<3", From 1b0c4274a1adcec407796c6fd3b6078d2a6df1a8 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Wed, 27 Jul 2022 14:09:34 +0200 Subject: [PATCH 13/33] docs: Fix syntax and added info about docs syntax --- docs/source/04_contributing/01_guideline.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/04_contributing/01_guideline.md b/docs/source/04_contributing/01_guideline.md index 9b59e69..43d27f5 100644 --- a/docs/source/04_contributing/01_guideline.md +++ b/docs/source/04_contributing/01_guideline.md @@ -13,13 +13,15 @@ For documentation updates we have `docs/Makefile` that runs `Sphinx` to update t There is automation using github actions that checks the spelling of the docs. [Links](), `blocks escaped with back ticks` \`...\` are omitted. In order to add new spelling exceptions, append them to the `docs/spellcheck_exceptions.txt` file. +For documentation syntax, we mainly use Markdown with [myst](https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html) parser. + ## Github hooks For linting and keeping code clean we use pre-commit package to join with github hooks. Use it by doing: -```bash -pip install pre-commit -pre-commit install +```console +$ pip install pre-commit +$ pre-commit install ``` ## Releasing new versions From d98af197b1ae07957df047dddd813252ce699252 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Wed, 27 Jul 2022 14:13:12 +0200 Subject: [PATCH 14/33] docs: Updated guideline syntax and completed the steps needed for env setup Moved local testing as enriched text admonitions to guide with tips about local cluster testing. --- .../03_getting_started/01_quickstart.md | 170 ++++++++++++++++-- .../04_contributing/02_local_testing.md | 37 +--- 2 files changed, 155 insertions(+), 52 deletions(-) diff --git a/docs/source/03_getting_started/01_quickstart.md b/docs/source/03_getting_started/01_quickstart.md index dad0036..5fb93e1 100644 --- a/docs/source/03_getting_started/01_quickstart.md +++ b/docs/source/03_getting_started/01_quickstart.md @@ -1,9 +1,14 @@ # Quickstart -## Preprequisites +(prerequisites-tag)= +## Prerequisites The quickstart assumes user have access to Kubeflow Pipelines deployment. Pipelines can be deployed on any Kubernetes cluster, including [local cluster](https://www.kubeflow.org/docs/pipelines/installation/localcluster-deployment/). +````{admonition} Local kubeflow cluster +There is also an option to test locally with running [Kubernetes in docker](https://getindata.com/blog/kubeflow-pipelines-running-5-minutes/) (kind). After going through that guide you should have Kubeflow up and running available at `http://localhost:9000`. +```` + ## Install the toy project with Kubeflow Pipelines support It is a good practice to start by creating a new virtualenv before installing new packages. Therefore, use `virtalenv` command to create new env and activate it: @@ -20,13 +25,13 @@ $ source venv-demo/bin/activate Then, `kedro` must be present to enable cloning the starter project, along with the latest version of `kedro-kubeflow` plugin and `kedro-docker` (required to build docker images with the Kedro pipeline nodes): -``` +```console $ pip install 'kedro<0.18' kedro-kubeflow kedro-docker ``` With the dependencies in place, let's create a new project (with the latest supported kedro version - 0.17.7): -``` +```console $ kedro new --starter=spaceflights --checkout=0.17.7 Project Name: @@ -59,7 +64,7 @@ Next go the demo project directory: $ cd kubeflow-plugin-demo/ ``` -Before installing the dependencies, add the `kedro-kubeflow` and `kedro-docker` to `requirements.*` in src: +Before installing the dependencies, add the `kedro-kubeflow` to `requirements.*` in src: ```console $ echo kedro-kubeflow >> src/requirements* ``` @@ -94,72 +99,192 @@ Commands: First, initialize the project with `kedro-docker` configuration by running: -``` +```console $ kedro docker init ``` This command creates a several files, including `.dockerignore`. This file ensures that transient files are not included in the docker image and it requires small adjustment. Open it in your favorite text editor and extend the section `# except the following` by adding there: ```console -!data/01_raw +$ echo !data/01_raw >> .dockerignore ``` -This change enforces raw data existence in the image. Also, one of the limitations of running the Kedro pipeline on Kubeflow (and not on local environemt) is inability to use `MemoryDataSets`, as the pipeline nodes do not share memory, so every artifact should be stored as file. The `spaceflights` demo configures four datasets as in-memory, so let's change the behaviour by adding these lines to `conf/base/catalog.yml`: +This change enforces raw data existence in the image. Also, one of the limitations of running the Kedro pipeline on Kubeflow (and not on local environemt) is inability to use `MemoryDataSets`, as the pipeline nodes do not share memory, so every artifact and intermediate data step should be stored as a file. The `spaceflights` demo configures four datasets as in-memory, so we need to change that. Replace the `conf/base/catalog.yml` with the following: -```console -X_train: +```yaml +companies: + type: pandas.CSVDataSet + filepath: data/01_raw/companies.csv + layer: raw + +reviews: + type: pandas.CSVDataSet + filepath: data/01_raw/reviews.csv + layer: raw + +shuttles: + type: pandas.ExcelDataSet + filepath: data/01_raw/shuttles.xlsx + layer: raw + load_args: + engine: openpyxl + +data_processing.preprocessed_companies: + type: pandas.ParquetDataSet + filepath: data/02_intermediate/preprocessed_companies.pq + layer: intermediate + +data_processing.preprocessed_shuttles: + type: pandas.ParquetDataSet + filepath: data/02_intermediate/preprocessed_shuttles.pq + layer: intermediate + +model_input_table: + type: pandas.ParquetDataSet + filepath: data/03_primary/model_input_table.pq + layer: primary + +data_science.active_modelling_pipeline.regressor: + type: pickle.PickleDataSet + filepath: data/06_models/regressor_active.pickle + versioned: true + layer: models + +data_science.candidate_modelling_pipeline.regressor: + type: pickle.PickleDataSet + filepath: data/06_models/regressor_candidate.pickle + versioned: true + layer: models + +data_science.active_modelling_pipeline.X_train: type: pickle.PickleDataSet filepath: data/05_model_input/X_train.pickle layer: model_input -y_train: +data_science.active_modelling_pipeline.y_train: type: pickle.PickleDataSet filepath: data/05_model_input/y_train.pickle layer: model_input -X_test: +data_science.active_modelling_pipeline.X_test: type: pickle.PickleDataSet filepath: data/05_model_input/X_test.pickle layer: model_input -y_test: +data_science.active_modelling_pipeline.y_test: type: pickle.PickleDataSet filepath: data/05_model_input/y_test.pickle layer: model_input + +data_science.active_modelling_pipeline.regressor: + type: pickle.PickleDataSet + filepath: data/06_models/regressor.pickle + versioned: true + layer: models + +data_science.candidate_modelling_pipeline.X_train: + type: pickle.PickleDataSet + filepath: data/05_model_input/X_train.pickle + layer: model_input + +data_science.candidate_modelling_pipeline.y_train: + type: pickle.PickleDataSet + filepath: data/05_model_input/y_train.pickle + layer: model_input + +data_science.candidate_modelling_pipeline.X_test: + type: pickle.PickleDataSet + filepath: data/05_model_input/X_test.pickle + layer: model_input + +data_science.candidate_modelling_pipeline.y_test: + type: pickle.PickleDataSet + filepath: data/05_model_input/y_test.pickle + layer: model_input + +data_science.candidate_modelling_pipeline.regressor: + type: pickle.PickleDataSet + filepath: data/06_models/regressor.pickle + versioned: true + layer: models ``` Finally, build the image: ```console -kedro docker build +$ kedro docker build ``` When execution finishes, your docker image is ready. If you don't use local cluster, you should push the image to the remote repository: ```console -docker tag kubeflow-plugin-demo:latest remote.repo.url.com/kubeflow-plugin-demo:latest -docker push remote.repo.url.com/kubeflow-plugin-demo:latest +$ docker tag kubeflow-plugin-demo:latest remote.repo.url.com/kubeflow-plugin-demo:latest +$ docker push remote.repo.url.com/kubeflow-plugin-demo:latest +``` + +````{admonition} Local cluster testing +The `kind` has its own docker registry that you need to upload the image to. However, since it does not have any connection to other registry we want to prevent it from trying to pull any image ([see the docs](https://kind.sigs.k8s.io/docs/user/quick-start/#loading-an-image-into-your-cluster)). In order to do that, we need to tag the built docker image with any specific version. Let's use `demo` tag, as any tag other than `latest` will do. + +Locate your image name (it should be the same as kedro project name) with: +```console +$ docker images ``` +Then tag your image with the following command: +```console +$ docker tag :latest :demo +``` + +Then you need to upload the image from local registry to the kind registry. Here `kfp` is the cluster name (the same as [in linked guide](prerequisites-tag). Default cluster name is `kind`. +```console +$ kind load docker-image :demo --name kfp +``` +```` + ## Run the pipeline on Kubeflow First, run `init` script to create the sample configuration. A parameter value should reflect the kubeflow base path **as seen from the system** (so no internal Kubernetes IP unless you run the local cluster): ```console -kedro kubeflow init https://kubeflow.cluster.com +$ kedro kubeflow init https://kubeflow.cluster.com (...) Configuration generated in /home/user/kedro/kubeflow-plugin-demo/conf/base/kubeflow.yaml ``` -````{note} -Since kedro 0.17 there have been introduced namespaces to datasets. If you wish to experiment on newer versions, consider the following in this step: The namespaces feature has not been tested yet with the plugin and there are some errors with dots in kfp artifacts names, so for now it's best to disable it by adding the following line in `conf/base/kubeflow.yaml`: +````{admonition} Local cluster testing +For local cluster the link is the following: `http://localhost:9000` +```` + + +````{warning} +Since kedro 0.17 there have been introduced namespaces to datasets which are not yet fully supported by this plugin as it causes issues within naming conventions of kfp artifacts. For now it's best to disable storage of kfp artifacts by adding/uncommenting the following line in `conf/base/kubeflow.yaml`: ```yaml store_kedro_outputs_as_kfp_artifacts: False ``` ```` -Then, if needed, adjust the `conf/base/kubeflow.yaml`. For example, the `image:` key should point to the full image name (like `remote.repo.url.com/kubeflow_plugin_demo:latest` if you pushed the image at this name). Depending on the storage classes availability in Kubernetes cluster, you may want to modify `volume.storageclass` and `volume.access_modes` (please consult with Kubernetes admin what values should be there). +Then, if needed, adjust the `conf/base/kubeflow.yaml`. For example, the `image:` key should point to the full image name (like `remote.repo.url.com/kubeflow_plugin_demo:latest` if you've pushed the image at this name). Depending on the storage classes availability in Kubernetes cluster, you may want to modify `volume.storageclass` and `volume.access_modes` (please consult with Kubernetes admin what values should be there). + +````{admonition} Local cluster testing +In this example you also need to update the tag of the `image:` part to also use `demo` instead latest. +```` + +````{warning} +The default limits of resources for node execution are too small for this example. Increase the following: +```yaml + resources: + (...) + __default__: + cpu: 200m + memory: 64Mi +``` +like that +```yaml + cpu: 1 + memory: 1Gi +``` +```` Finally, everything is set to run the pipeline on Kubeflow. Run `upload-pipeline`: @@ -183,6 +308,13 @@ By using `Create run` button you can start a run of the pipeline on the cluster. ![Pipeline run](pipeline_run.gif) +````{tip} +You can also schedule a single run by using +```console +$ kedro kubeflow run-once +``` +```` + From the UI you can access the logs of the execution. If everything seems fine, use `schedule to create a recurring run: ```console diff --git a/docs/source/04_contributing/02_local_testing.md b/docs/source/04_contributing/02_local_testing.md index 6a996bc..8ee49be 100644 --- a/docs/source/04_contributing/02_local_testing.md +++ b/docs/source/04_contributing/02_local_testing.md @@ -1,45 +1,16 @@ +(local-testing)= # Local testing ## Unit tests The plugin has unit tests that can be run with `tox`: ```console -pip install tox-pip-version -tox -v -e py38 +$ pip install tox-pip-version +$ tox -v -e py38 ``` You can also run them manually by executing `python -m unittest` in the root folder. They are also executed with github action on pull requests to test the stability of new changes. See `.github/workflows/python-package.yml`. ## E2E tests -There is also a set up with Kubeflow running on team-maintained Google Cloud Platform. It tests the execution on said Kubeflow platform with `spaceflight` kedro starter. They are also automated with github action. See `.github/workflows/e2e-tests.yml`. - -## Local cluster testing - -If you have enough RAM, there is also an option to test locally with running [Kubernetes in docker (kind)](https://getindata.com/blog/kubeflow-pipelines-running-5-minutes/). After going through that guideline you should have Kubeflow up and running available at `http://localhost:9000`. - -There are few differences from (quickstart)[#Quickstart]. For `kedro init` use the `http://localhost:9000` as an endpoint. - -The kind has its own docker registry that you need to upload the image to. However, since it does not have any connection to other registry we want to prevent it from trying to pull any image. In order to do that, we need to tag the built docker image with specific version. - -Locate your image name (it should be the same as kedro project name) with: -``` -docker images -``` - -Then tag your image with the following command (the version is arbitrary and can be any other version): -``` -docker tag :latest :1.0 -``` - -Then you need to upload the image from local registry to the kind registry. Here `kfp` is the cluster name, the same as used in guide in the link above. Default cluster name is `kind`. -``` -kind load docker-image :1.0 --name kfp -``` - -Lastly, in order to run a job or a schedule, you need to specify the image with image version, i.e: -``` -kedro kubeflow run-once -i docker.io/library/:1.0 -``` - -With that you should be able to test the plugin end to end with local sandbox Kubeflow cluster. \ No newline at end of file +There is also a set up with Kubeflow running on team-maintained Google Cloud Platform. It tests the execution on said Kubeflow platform with `spaceflight` kedro starter. They are also automated with github action. See `.github/workflows/e2e-tests.yml`. \ No newline at end of file From 147347c500a0428c0c58275ee1528f78f7d776c2 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Wed, 27 Jul 2022 14:30:16 +0200 Subject: [PATCH 15/33] docs: Fixed typos and spellcheck config to be compatible with myst --- .spellcheck.yml | 4 ++-- docs/source/03_getting_started/01_quickstart.md | 4 ++-- docs/source/04_contributing/01_guideline.md | 10 ++++++++-- docs/spellcheck_exceptions.txt | 1 + 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.spellcheck.yml b/.spellcheck.yml index 984e0a2..8cce72d 100644 --- a/.spellcheck.yml +++ b/.spellcheck.yml @@ -15,10 +15,10 @@ matrix: # ``` # content # ``` - - open: '(?s)(?P *`{3,})' + - open: '^(?s)(?P`{1,3})[^`]' close: '(?P=open)' # Ignore text between inline back ticks - - open: '(?P`+)' + - open: '(?P`)[^`]' close: '(?P=open)' # Ignore text in brackets [] and () - open: '\[' diff --git a/docs/source/03_getting_started/01_quickstart.md b/docs/source/03_getting_started/01_quickstart.md index 5fb93e1..0d5cff8 100644 --- a/docs/source/03_getting_started/01_quickstart.md +++ b/docs/source/03_getting_started/01_quickstart.md @@ -257,7 +257,7 @@ For local cluster the link is the following: `http://localhost:9000` ````{warning} -Since kedro 0.17 there have been introduced namespaces to datasets which are not yet fully supported by this plugin as it causes issues within naming conventions of kfp artifacts. For now it's best to disable storage of kfp artifacts by adding/uncommenting the following line in `conf/base/kubeflow.yaml`: +Since kedro 0.17 there have been introduced name spaces to datasets which are not yet fully supported by this plugin as it causes issues within naming conventions of kfp artifacts. For now it's best to disable storage of kfp artifacts by adding/uncommenting the following line in `conf/base/kubeflow.yaml`: ```yaml store_kedro_outputs_as_kfp_artifacts: False ``` @@ -312,7 +312,7 @@ By using `Create run` button you can start a run of the pipeline on the cluster. You can also schedule a single run by using ```console $ kedro kubeflow run-once -``` +``` ```` From the UI you can access the logs of the execution. If everything seems fine, use `schedule to create a recurring run: diff --git a/docs/source/04_contributing/01_guideline.md b/docs/source/04_contributing/01_guideline.md index 43d27f5..a1355ea 100644 --- a/docs/source/04_contributing/01_guideline.md +++ b/docs/source/04_contributing/01_guideline.md @@ -11,9 +11,15 @@ There are few things that you should know about our workflow: For documentation updates we have `docs/Makefile` that runs `Sphinx` to update the `readthedocs`. -There is automation using github actions that checks the spelling of the docs. [Links](), `blocks escaped with back ticks` \`...\` are omitted. In order to add new spelling exceptions, append them to the `docs/spellcheck_exceptions.txt` file. +There is automation using github actions that checks the spelling of the docs. [Links](), `inline blocks escaped with back ticks` \`...\` and +``` +triple backtick blocks +``` +are omitted. + + In order to add new spelling exceptions, append them to the `docs/spellcheck_exceptions.txt` file. -For documentation syntax, we mainly use Markdown with [myst](https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html) parser. +For documentation syntax, we use Markdown with [myst](https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html) parser. ## Github hooks diff --git a/docs/spellcheck_exceptions.txt b/docs/spellcheck_exceptions.txt index 39333a6..eb57d57 100644 --- a/docs/spellcheck_exceptions.txt +++ b/docs/spellcheck_exceptions.txt @@ -81,6 +81,7 @@ templated tensorflow ttl txt +uncommenting url venv versionable From 7482b4e69d87b723f73521584be9d25589137c35 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Wed, 27 Jul 2022 14:37:32 +0200 Subject: [PATCH 16/33] build: Removed on push spellcheck trigger As per @mwiewior's suggestion to limit the runs --- .github/workflows/spellcheck.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml index 991f8d7..d03465a 100644 --- a/.github/workflows/spellcheck.yml +++ b/.github/workflows/spellcheck.yml @@ -2,9 +2,7 @@ name: Spellcheck # Controls when the action will run. on: - # Triggers the workflow on push or pull request events but only for the main branch - push: - paths: "docs/**" + # Triggers the workflow on pull request events only if docs files have changed pull_request: paths: "docs/**" # Allows you to run this workflow manually from the Actions tab From be5a9e39c2a48ea5e4e642e401e1baf3a4809718 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Wed, 27 Jul 2022 14:54:31 +0200 Subject: [PATCH 17/33] docs: Update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc3b83c..2c7519a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ # Changelog ## [Unreleased] +- Changed sphinx markdown engine to myst +- Added CI for spellchecking the documentation with configuration for myst +- Updated documentation quickstart to workaround known issues and make it work on local kind cluster +- Updated documentation - added contributing guidelines and setup tips - Added --wait-for-completion and --timeout for `kedro kubeflow run-once` command - Added e2e tests github action for pull requests with kubeflow setup in gcp - Added support for extra volumes per node From b0dbd3a9bd948e03029278731ccaa23832df68b4 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Thu, 28 Jul 2022 18:24:36 +0200 Subject: [PATCH 18/33] build: Added pyspelling pre-commit check --- .pre-commit-config.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a60747a..fe0cd2f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,11 +1,11 @@ repos: - repo: https://github.com/pycqa/isort - rev: 5.5.4 + rev: 5.10.1 hooks: - id: isort args: ["--profile", "black", "--line-length=79"] - repo: https://github.com/psf/black - rev: stable + rev: 22.6.0 hooks: - id: black - repo: https://github.com/pre-commit/pre-commit-hooks @@ -13,3 +13,7 @@ repos: hooks: - id: flake8 args: ['--ignore=E203,W503'] # see https://github.com/psf/black/issues/315 https://github.com/psf/black/issues/52 +- repo: https://github.com/getindata/py-pre-commit-hooks + rev: v0.1.3 + hooks: + - id: pyspelling-docker From 2a40cb8a6f77e639ae29da056ee73dbdc8b047e7 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Thu, 28 Jul 2022 18:26:18 +0200 Subject: [PATCH 19/33] fixup! docs: Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c7519a..9bc5ab4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## [Unreleased] +- Added pre-commit hook for pyspelling check - Changed sphinx markdown engine to myst - Added CI for spellchecking the documentation with configuration for myst - Updated documentation quickstart to workaround known issues and make it work on local kind cluster From 3dbc58d7bc2d9d7f8ba9baac239389ffa0c7dc25 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Fri, 29 Jul 2022 14:44:28 +0200 Subject: [PATCH 20/33] docs: Fix warning --- docs/source/04_contributing/01_guideline.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/04_contributing/01_guideline.md b/docs/source/04_contributing/01_guideline.md index a1355ea..60878f7 100644 --- a/docs/source/04_contributing/01_guideline.md +++ b/docs/source/04_contributing/01_guideline.md @@ -7,11 +7,12 @@ There are few things that you should know about our workflow: - On release versions from develop branch are tagged and pulled to the master branch. - For commits we follow [angular commit messages guideline](https://github.com/angular/angular/blob/main/CONTRIBUTING.md#commit). +(updating-the-documentation)= ## Updating the documentation For documentation updates we have `docs/Makefile` that runs `Sphinx` to update the `readthedocs`. -There is automation using github actions that checks the spelling of the docs. [Links](), `inline blocks escaped with back ticks` \`...\` and +There is automation using github actions that checks the spelling of the docs. [Links](updating-the-documentation), `inline blocks escaped with back ticks` \`...\` and ``` triple backtick blocks ``` From 6ce03aec1434819143b430b9135e7aba04b5a848 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Fri, 29 Jul 2022 15:54:32 +0200 Subject: [PATCH 21/33] feat: Jinja templating/substitutions enabled generated variables It enables the feature of using variables in documentation. Resolves #136 --- docs/conf.py | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 75ee2e3..2733aae 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,6 +15,9 @@ # sys.path.insert(0, os.path.abspath('.')) import re +from pip._vendor import pkg_resources + +from kedro_kubeflow import __name__ as _package_name from kedro_kubeflow import version as release # -- Project information ----------------------------------------------------- @@ -23,9 +26,60 @@ copyright = "2020, GetInData" author = "GetInData" +myst_substitutions = { + "tested_kedro": "0.17.7", + "release": release, + "python_build_version": "", +} + # The full version, including alpha/beta/rc tags version = re.match(r"^([0-9]+\.[0-9]+).*", release).group(1) - +_package_name = _package_name.replace("_", "-") +_package = pkg_resources.working_set.by_key[_package_name] + +# Extending keys for subsitutions with versions of package +myst_substitutions.update( + {"req_" + p.name: str(p) for p in _package.requires()} +) +myst_substitutions.update( + { + "req_build_" + p.name: pkg_resources.get_distribution(p).version + for p in _package.requires() + } +) +myst_substitutions.update( + { + "req_upper_" + + p.name: "".join( + [ + "".join(i) + for i in filter( + lambda x: x[0] in ["<", "<=", "~=", "==", "==="], p.specs + ) + ] + ) + for p in _package.requires() + } +) +myst_substitutions.update( + { + "req_lower_" + + p.name: "".join( + [ + "".join(i) + for i in filter( + lambda x: x[0] in [">", ">=", "~=", "==", "==="], p.specs + ) + ] + ) + for p in _package.requires() + } +) + + +myst_substitutions.update( + {"req_build" + p.name: str(p) for p in _package.requires()} +) # -- General configuration --------------------------------------------------- @@ -46,6 +100,11 @@ "myst_parser", "sphinx_rtd_theme", ] +myst_enable_extensions = [ + "replacements", + "strikethrough", + "substitution", +] # Add any paths that contain templates here, relative to this directory. From 404f6e98b234cf397eda0e275bc77a5d3e295e0d Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Fri, 29 Jul 2022 16:17:03 +0200 Subject: [PATCH 22/33] docs: Merged CONTRIBUTIONS.md with 01_contributing Spellcheck now also enabled for README and CONTRIBUTING. Fixed spelling mistakes --- .spellcheck.yml | 4 +- CONTRIBUTING.md | 50 ++++++++++++++++++--- docs/source/04_contributing/01_guideline.md | 37 +-------------- docs/spellcheck_exceptions.txt | 8 ++++ 4 files changed, 57 insertions(+), 42 deletions(-) mode change 100644 => 120000 docs/source/04_contributing/01_guideline.md diff --git a/.spellcheck.yml b/.spellcheck.yml index 8cce72d..192a610 100644 --- a/.spellcheck.yml +++ b/.spellcheck.yml @@ -4,7 +4,9 @@ matrix: aspell: lang: en sources: - - docs/source/*/*.md + - "docs/**.md" + - "CONTRIBUTING.md" + - "README.md" default_encoding: utf-8 pipeline: - pyspelling.filters.context: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 22e88d6..4e94e51 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,23 +1,63 @@ +# Contributing guidelines -## PR Guidelines +Everyone is welcome to contribute to the code of this plugin, however we have some automation and guidelines you should get familiar with first. + +### PR Guidelines 1. Fork branch from `develop`. 1. Ensure to provide unit tests for new functionality. 1. Install dev requirements: `pip install -r requirements-dev.txt` and setup a hook: `pre-commit install` +1. For commits we follow [angular commit messages guideline](https://github.com/angular/angular/blob/main/CONTRIBUTING.md#commit). 1. Update documentation accordingly. -1. Update [changelog](CHANGELOG.md) according to ["Keep a changelog"](https://keepachangelog.com/en/1.0.0/) guidelines. +1. Update [changelog](https://github.com/getindata/kedro-kubeflow/blob/develop/CHANGELOG.md) according to ["Keep a changelog"](https://keepachangelog.com/en/1.0.0/) guidelines. 1. Squash changes with a single commit as much as possible and ensure verbose PR name. 1. Open a PR against `develop` *We reserve the right to take over and modify or abandon PRs that do not match the workflow or are abandoned.* -## Release workflow +(updating-the-documentation)= +### Updating the documentation + +For documentation updates we have `docs/Makefile` that runs `Sphinx` to update the [readthedocs](https://kedro-kubeflow.readthedocs.io). + +There is automation using github actions that checks the spelling of the docs. [Links](updating-the-documentation), `inline blocks escaped with back ticks` \`...\` and +``` +triple backtick blocks +``` +are omitted. + + In order to add new spelling exceptions, append them to the `docs/spellcheck_exceptions.txt` file. + +For documentation syntax, we use Markdown with [myst](https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html) parser. + +##### Templating + +We have `jinja` available to be used in documentation. The variables are defined in `docs/conf.py` in `myst_substitutions`. By default the following are available: + + - `release` - the package version with which it was built + - `req_` - the specification of version package requirement range in `setup.py` + - `req_lower_` - the specification of version package requirement lower bound in `setup.py` + - `req_upper_` - the specification of version package requirement upper bound in `setup.py` + - `req_build_` - the specification of version package with which it was built + +### Pre-commit and github hooks + +For linting and keeping code clean we use pre-commit package to join with github hooks. Use it by doing: + +```console +$ pip install pre-commit +$ pre-commit install +``` + +You can test github actions locally with [act](https://github.com/nektos/act). + +### Release workflow 1. Create the release candidate: - Go to the [Prepare release](https://github.com/getindata/kedro-kubeflow/actions?query=workflow%3A%22Prepare+release%22) action. - Click "Run workflow" - Enter the part of the version to bump (one of `..`). Minor (x.**x**.x) is a default. -2. If the workflow has run sucessfully: - - Go to the newly openened PR named `Release candidate `` +2. If the workflow has run successfully: + - Go to the newly opened PR named `Release candidate ` - Check that changelog and version have been properly updated. If not pull the branch and apply manual changes if necessary. - Merge the PR to master 3. Checkout the [Publish](https://github.com/getindata/kedro-kubeflow/actions?query=workflow%3APublish) workflow to see if: diff --git a/docs/source/04_contributing/01_guideline.md b/docs/source/04_contributing/01_guideline.md deleted file mode 100644 index 60878f7..0000000 --- a/docs/source/04_contributing/01_guideline.md +++ /dev/null @@ -1,36 +0,0 @@ -# Contributing guidelines - -Everyone is welcome to contribute to the code of this plugin, however we have some automation and guidelines you should get familiar with first. - -There are few things that you should know about our workflow: -- All changes should be made as pull requests to develop branch. -- On release versions from develop branch are tagged and pulled to the master branch. -- For commits we follow [angular commit messages guideline](https://github.com/angular/angular/blob/main/CONTRIBUTING.md#commit). - -(updating-the-documentation)= -## Updating the documentation - -For documentation updates we have `docs/Makefile` that runs `Sphinx` to update the `readthedocs`. - -There is automation using github actions that checks the spelling of the docs. [Links](updating-the-documentation), `inline blocks escaped with back ticks` \`...\` and -``` -triple backtick blocks -``` -are omitted. - - In order to add new spelling exceptions, append them to the `docs/spellcheck_exceptions.txt` file. - -For documentation syntax, we use Markdown with [myst](https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html) parser. - -## Github hooks - -For linting and keeping code clean we use pre-commit package to join with github hooks. Use it by doing: - -```console -$ pip install pre-commit -$ pre-commit install -``` - -## Releasing new versions - -TBD diff --git a/docs/source/04_contributing/01_guideline.md b/docs/source/04_contributing/01_guideline.md new file mode 120000 index 0000000..c97564d --- /dev/null +++ b/docs/source/04_contributing/01_guideline.md @@ -0,0 +1 @@ +../../../CONTRIBUTING.md \ No newline at end of file diff --git a/docs/spellcheck_exceptions.txt b/docs/spellcheck_exceptions.txt index eb57d57..04f03ce 100644 --- a/docs/spellcheck_exceptions.txt +++ b/docs/spellcheck_exceptions.txt @@ -25,13 +25,17 @@ OpenID Pipelnes Posix PowerShellActivator +PR +PRs Preprequisites PythonActivator +PyPI Quickstart ReadWriteMany ReadWriteOnce SDK TBD +Templating UI VM XonshActivator @@ -42,6 +46,7 @@ behaviour cls conf config +changelog cpu cron datasets @@ -52,10 +57,12 @@ faac getindata github gpu +html http https init io +jinja kedro kfp kfpclient @@ -78,6 +85,7 @@ shm src storageclass templated +templating tensorflow ttl txt From 8affce41cd9e077e882c71af92eb7ab049087a09 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Fri, 29 Jul 2022 17:27:47 +0200 Subject: [PATCH 23/33] docs: Removed outdated info and applied templates of versioning For code fences a workaround needs to be done, as jinja is ignored inside code fences --- docs/source/02_installation/01_installation.md | 8 ++------ docs/source/03_getting_started/01_quickstart.md | 11 ++++------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/docs/source/02_installation/01_installation.md b/docs/source/02_installation/01_installation.md index 6e57ada..1dcde4b 100644 --- a/docs/source/02_installation/01_installation.md +++ b/docs/source/02_installation/01_installation.md @@ -2,13 +2,9 @@ ## Kedro setup -First, you need to install base Kedro package in ``<17.0`` version +First, you need to install base Kedro package in {{req_kedro}} version -> Kedro 17.0 is supported by kedro-kubeflow, but [not by kedro-mlflow](https://github.com/Galileo-Galilei/kedro-mlflow/issues/144) yet, so the latest version from 0.16 family is recommended. - -```console -$ pip install 'kedro<0.17' -``` +{{"```console\n$ pip install '{kedro}'\n```".format(kedro=req_kedro)}} ## Plugin installation diff --git a/docs/source/03_getting_started/01_quickstart.md b/docs/source/03_getting_started/01_quickstart.md index 0d5cff8..d76b358 100644 --- a/docs/source/03_getting_started/01_quickstart.md +++ b/docs/source/03_getting_started/01_quickstart.md @@ -25,15 +25,12 @@ $ source venv-demo/bin/activate Then, `kedro` must be present to enable cloning the starter project, along with the latest version of `kedro-kubeflow` plugin and `kedro-docker` (required to build docker images with the Kedro pipeline nodes): -```console -$ pip install 'kedro<0.18' kedro-kubeflow kedro-docker -``` +{{"```console\n $ pip install '{req_kedro}' kedro-kubeflow kedro-docker\n```".format(req_kedro=req_kedro)}} -With the dependencies in place, let's create a new project (with the latest supported kedro version - 0.17.7): - -```console -$ kedro new --starter=spaceflights --checkout=0.17.7 +With the dependencies in place, let's create a new project (with the latest supported kedro version - {{tested_kedro}}): +{{"```console\n$ kedro new --starter=spaceflights --checkout={version}\n".format(version=tested_kedro)}} +``` Project Name: ============= Please enter a human readable name for your new project. From f6f78c5ed74005337ef5e0e760493f670b5ed423 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Fri, 29 Jul 2022 17:49:01 +0200 Subject: [PATCH 24/33] fix: made codeclimate happy, fixed duplicate --- docs/conf.py | 51 ++++++++++++++++++--------------------------------- 1 file changed, 18 insertions(+), 33 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 2733aae..8fa2752 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,6 +14,7 @@ # import sys # sys.path.insert(0, os.path.abspath('.')) import re +from pprint import pprint from pip._vendor import pkg_resources @@ -29,7 +30,6 @@ myst_substitutions = { "tested_kedro": "0.17.7", "release": release, - "python_build_version": "", } # The full version, including alpha/beta/rc tags @@ -47,39 +47,24 @@ for p in _package.requires() } ) -myst_substitutions.update( - { - "req_upper_" - + p.name: "".join( - [ - "".join(i) - for i in filter( - lambda x: x[0] in ["<", "<=", "~=", "==", "==="], p.specs - ) - ] - ) - for p in _package.requires() - } -) -myst_substitutions.update( - { - "req_lower_" - + p.name: "".join( - [ - "".join(i) - for i in filter( - lambda x: x[0] in [">", ">=", "~=", "==", "==="], p.specs - ) - ] - ) - for p in _package.requires() - } -) - -myst_substitutions.update( - {"req_build" + p.name: str(p) for p in _package.requires()} -) +conditions = { + "upper": ["<", "<=", "~=", "==", "==="], + "lower": [">", ">=", "~=", "==", "==="], +} +for k, cond in conditions.items(): + myst_substitutions.update( + { + f"req_{k}_" + + p.name: "".join( + ["".join(i) for i in filter(lambda x: x[0] in cond, p.specs)] + ) + for p in _package.requires() + } + ) + +print("Available patterns for substituion:") +pprint(myst_substitutions) # -- General configuration --------------------------------------------------- From 744340acd359703ae6533417dcc0d7793493d8a6 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Fri, 29 Jul 2022 18:01:43 +0200 Subject: [PATCH 25/33] docs: Changelog update --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bc5ab4..482cb28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## [Unreleased] +- Added templating capabilities to docs generator and used them in the docs for kedro versioning - Added pre-commit hook for pyspelling check - Changed sphinx markdown engine to myst - Added CI for spellchecking the documentation with configuration for myst From 53c79861b339609ea921e5f3f0cfe8c3f8bef225 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Fri, 29 Jul 2022 18:06:35 +0200 Subject: [PATCH 26/33] docs: Updated guide for templating in contributing --- CONTRIBUTING.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4e94e51..f5c6599 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -39,6 +39,12 @@ We have `jinja` available to be used in documentation. The variables are defined - `req_upper_` - the specification of version package requirement upper bound in `setup.py` - `req_build_` - the specification of version package with which it was built +The templates do not work inside code fences or links. You can workaround that by putting whole code fence inside `jinja` and then use the formatting inside. See the [substitution documentation](https://myst-parser.readthedocs.io/en/latest/syntax/optional.html#substitutions-with-jinja2) for details. + +```{warning} +The substitutions do not seem to be fully `jinja2` capable. While expressions `{{ }}` do work, the control statements `{% %}` do not. +``` + ### Pre-commit and github hooks For linting and keeping code clean we use pre-commit package to join with github hooks. Use it by doing: From 57d5799e78135160315726662bac56b1ce554588 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Wed, 3 Aug 2022 14:41:29 +0200 Subject: [PATCH 27/33] feat: Extended the available templates for package extras versions This was overlooked in previous solution. --- docs/conf.py | 66 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 8fa2752..4fe707b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -37,31 +37,53 @@ _package_name = _package_name.replace("_", "-") _package = pkg_resources.working_set.by_key[_package_name] + # Extending keys for subsitutions with versions of package -myst_substitutions.update( - {"req_" + p.name: str(p) for p in _package.requires()} -) -myst_substitutions.update( - { - "req_build_" + p.name: pkg_resources.get_distribution(p).version - for p in _package.requires() +def update_templates_with_requirements(packages_set, label): + """Local function for updating template labels with requirements""" + myst_substitutions.update({label + p.name: str(p) for p in packages_set}) + + built_packages = {} + for p in packages_set: + try: + req_label = label + "build_" + p.name + built_packages[req_label] = pkg_resources.get_distribution( + p + ).version + except pkg_resources.DistributionNotFound: + pass + myst_substitutions.update(built_packages) + + conditions = { + "upper": ["<", "<=", "~=", "==", "==="], + "lower": [">", ">=", "~=", "==", "==="], } -) - -conditions = { - "upper": ["<", "<=", "~=", "==", "==="], - "lower": [">", ">=", "~=", "==", "==="], + for k, cond in conditions.items(): + myst_substitutions.update( + { + label + + k + + "_" + + p.name: "".join( + [ + "".join(i) + for i in filter(lambda x: x[0] in cond, p.specs) + ] + ) + for p in packages_set + } + ) + + +base_requirements = set(_package.requires()) +extra_requires = { + extra: set(_package.requires(extras=(extra,))) - base_requirements + for extra in _package.extras } -for k, cond in conditions.items(): - myst_substitutions.update( - { - f"req_{k}_" - + p.name: "".join( - ["".join(i) for i in filter(lambda x: x[0] in cond, p.specs)] - ) - for p in _package.requires() - } - ) +update_templates_with_requirements(base_requirements, "req_") +for extra, reqs in extra_requires.items(): + update_templates_with_requirements(reqs, f"req_{extra}_") + print("Available patterns for substituion:") pprint(myst_substitutions) From 34e588f0c99444b18c0eea0d6c879b8630f9d5d1 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Wed, 3 Aug 2022 14:47:48 +0200 Subject: [PATCH 28/33] fix: Allowed for newer versions of sphinx, as rtd failed to build --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6ccf90a..b7b5c9f 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ "responses>=0.13.4", ], "docs": [ - "sphinx==5.0.2", + "sphinx>=5.0.2", "sphinx_rtd_theme==1.0.0", "myst-parser==0.18.0", ], From 0eb1d819d03009a7cbc9d80b560bc08dc2d2d62d Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Wed, 3 Aug 2022 15:03:24 +0200 Subject: [PATCH 29/33] fixup! fix: Allowed for newer versions of sphinx, as rtd failed to build --- docs/requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index beefe52..d636dcf 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ -sphinx +sphinx~=5.0.2 sphinx_rtd_theme recommonmark myst-parser diff --git a/setup.py b/setup.py index b7b5c9f..e692f7c 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ "responses>=0.13.4", ], "docs": [ - "sphinx>=5.0.2", + "sphinx~=5.0.2", "sphinx_rtd_theme==1.0.0", "myst-parser==0.18.0", ], From 360e080aace3151c0b6f0e10885ea0e3f8a82159 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Wed, 3 Aug 2022 15:13:51 +0200 Subject: [PATCH 30/33] build: Configure spellcheck to be case insensitive Also fixed issue with wildcard path finding --- .spellcheck.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.spellcheck.yml b/.spellcheck.yml index 192a610..6bfc9df 100644 --- a/.spellcheck.yml +++ b/.spellcheck.yml @@ -2,9 +2,10 @@ spellchecker: aspell matrix: - name: Check the english docs aspell: + ignore-case: "" lang: en sources: - - "docs/**.md" + - "docs/**/*.md" - "CONTRIBUTING.md" - "README.md" default_encoding: utf-8 @@ -27,6 +28,8 @@ matrix: close: '\]' - open: '\(' close: '\)' + - open: '\{' + close: '\}' dictionary: wordlists: - docs/spellcheck_exceptions.txt From ae332138ae551ebc04c799e6c9bbf64b4868e7f3 Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Wed, 3 Aug 2022 15:27:43 +0200 Subject: [PATCH 31/33] docs: Fixed a typo --- docs/source/03_getting_started/05_authentication.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/03_getting_started/05_authentication.md b/docs/source/03_getting_started/05_authentication.md index adce5cc..5da7dbc 100644 --- a/docs/source/03_getting_started/05_authentication.md +++ b/docs/source/03_getting_started/05_authentication.md @@ -12,7 +12,7 @@ Dex is the recommended authentication mechanism for on-premise Kubeflow clusters * [oidc-autheservice](https://github.com/arrikto/oidc-authservice) redirect unauthenticated users to Dex, * [Dex](https://github.com/dexidp/dex) authenticates user in remote system, like LDAP or OpenID and also acts as OpenID provider, -* `oidc-autheservice` asks Dex for token and creates the session used across entire Kubeflow. +* `oidc-autheservice` asks Dex for a token and creates the session used across entire Kubeflow. In order to use `kedro-kubeflow` behind Dex-secured clusters, use the following manual: From dbcd2f569188403f265e09687b6d46e716f46d0a Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Wed, 3 Aug 2022 15:36:20 +0200 Subject: [PATCH 32/33] docs: Added hints for tox --- docs/source/04_contributing/02_local_testing.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/source/04_contributing/02_local_testing.md b/docs/source/04_contributing/02_local_testing.md index 8ee49be..0596636 100644 --- a/docs/source/04_contributing/02_local_testing.md +++ b/docs/source/04_contributing/02_local_testing.md @@ -3,14 +3,16 @@ ## Unit tests -The plugin has unit tests that can be run with `tox`: +The plugin has unit tests that can be run with `tox`, for example for python 3.8: ```console $ pip install tox-pip-version $ tox -v -e py38 ``` +List available python environments to test with using `tox -l`. + You can also run them manually by executing `python -m unittest` in the root folder. They are also executed with github action on pull requests to test the stability of new changes. See `.github/workflows/python-package.yml`. ## E2E tests -There is also a set up with Kubeflow running on team-maintained Google Cloud Platform. It tests the execution on said Kubeflow platform with `spaceflight` kedro starter. They are also automated with github action. See `.github/workflows/e2e-tests.yml`. \ No newline at end of file +There is also a set up with Kubeflow running on team-maintained Google Cloud Platform. It tests the execution on said Kubeflow platform with `spaceflight` kedro starter. They are also automated with github action. See `.github/workflows/e2e-tests.yml`. From 1eb83d01c0f1a9afd98375230eccb4def8d60fad Mon Sep 17 00:00:00 2001 From: Artur Dobrogowski Date: Thu, 4 Aug 2022 10:05:12 +0200 Subject: [PATCH 33/33] docs: Removed outdated hint about resources --- docs/source/03_getting_started/01_quickstart.md | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/docs/source/03_getting_started/01_quickstart.md b/docs/source/03_getting_started/01_quickstart.md index d76b358..71730fc 100644 --- a/docs/source/03_getting_started/01_quickstart.md +++ b/docs/source/03_getting_started/01_quickstart.md @@ -267,21 +267,6 @@ Then, if needed, adjust the `conf/base/kubeflow.yaml`. For example, the `image:` In this example you also need to update the tag of the `image:` part to also use `demo` instead latest. ```` -````{warning} -The default limits of resources for node execution are too small for this example. Increase the following: -```yaml - resources: - (...) - __default__: - cpu: 200m - memory: 64Mi -``` -like that -```yaml - cpu: 1 - memory: 1Gi -``` -```` Finally, everything is set to run the pipeline on Kubeflow. Run `upload-pipeline`: