Merge pull request #128 from getindata/release-0.6.4

Release 0.6.4
getindata · Jun 1, 2022 · ebb9452 · ebb9452
2 parents 5e57a04 + 7b6bf3d
commit ebb9452
Show file tree

Hide file tree

Showing 9 changed files with 125 additions and 5 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## [Unreleased]
 
+## [0.6.4] - 2022-06-01
+
+-   Added support for specifying tolerations
+
 ## [0.6.3] - 2022-05-10
 
 -   KFP SDK version bumped to 1.8.11 in order to fix misbehaving TTL issue
@@ -133,7 +137,9 @@
 -   Method to schedule runs for most recent version of given pipeline `kedro kubeflow schedule` 
 -   Shortcut to open UI for pipelines using `kedro kubeflow ui` 
 
-[Unreleased]: https://github.com/getindata/kedro-kubeflow/compare/0.6.3...HEAD
+[Unreleased]: https://github.com/getindata/kedro-kubeflow/compare/0.6.4...HEAD
+
+[0.6.4]: https://github.com/getindata/kedro-kubeflow/compare/0.6.3...0.6.4
 
 [0.6.3]: https://github.com/getindata/kedro-kubeflow/compare/0.6.2...0.6.3
 

diff --git a/docs/source/01_introduction/01_intro.md b/docs/source/01_introduction/01_intro.md
@@ -9,7 +9,7 @@ like pod or volume) and edges (dependencies between the nodes, like passing outp
 data as input). The pipelines are stored in the versioned database, allowing user 
 to run the pipeline once or schedule the recurring run.
 
-## Why to integrate Kedro project with Pipelines?
+## Why integrate a Kedro project with Pipelines?
 
 Kubeflow Pipelines' main attitude is the portability. Once you define a pipeline,
 it can be started on any Kubernetes cluster. The code to execute is stored inside 

diff --git a/docs/source/02_installation/02_configuration.md b/docs/source/02_installation/02_configuration.md
@@ -90,6 +90,19 @@ run_config:
     # Flak indicating if volume for inter-node data exchange should be
     # kept after the pipeline is deleted
     keep: False
+
+  # Optional section allowing adjustment of the tolerations for the nodes
+  tolerations:
+    __default__:
+    - key: "dedicated"
+      operator: "Equal"
+      value: "ml-ops"
+      effect: "NoSchedule"
+    node_a:
+    - key: "dedicated"
+      operator: "Equal"
+      value: "gpu_workload"
+      effect: "NoSchedule"
 
   # Optional section allowing adjustment of the resources
   # reservations and limits for the nodes

diff --git a/kedro_kubeflow/__init__.py b/kedro_kubeflow/__init__.py
@@ -1,3 +1,3 @@
 """kedro_kubeflow."""
 
-version = "0.6.3"
+version = "0.6.4"
diff --git a/kedro_kubeflow/config.py b/kedro_kubeflow/config.py
@@ -128,6 +128,21 @@
       num_retries: 4
       backoff_duration: 60s
       backoff_factor: 2
+    # Optional section allowing adjustment of the resources
+  # reservations and limits for the nodes
+  # optional section for specifying tolerations per node.
+  # the __default__ section will be loaded if nothing is specified for a particular node.
+  tolerations:
+    __default__:
+    - key: "dedicated"
+      operator: "Equal"
+      value: "ml-ops"
+      effect: "NoSchedule"
+    node_a:
+    - key: "gpu_resource"
+      operator: "Equal"
+      value: "voltaire"
+      effect: "NoSchedule"
 """
 
 
@@ -192,6 +207,17 @@ def get_for(self, node_name):
         return {**defaults, **node_specific}
 
 
+class Tolerations(Config):
+    def is_set_for(self, node_name):
+        return bool(self.get_for(node_name))
+
+    def get_for(self, node_name):
+        node_values = self._get_or_default(node_name, [])
+        if node_values:
+            return node_values
+        return self._get_or_default("__default__", [])
+
+
 class RetryPolicy(Config):
     def is_set_for(self, node_name):
         return self.get_for(node_name) != {}
@@ -251,6 +277,10 @@ def description(self):
     def resources(self):
         return NodeResources(self._get_or_default("resources", {}))
 
+    @property
+    def tolerations(self):
+        return Tolerations(self._get_or_default("tolerations", {}))
+
     @property
     def retry_policy(self):
         return RetryPolicy(self._get_or_default("retry_policy", {}))

diff --git a/kedro_kubeflow/generators/utils.py b/kedro_kubeflow/generators/utils.py
@@ -137,4 +137,7 @@ def customize_op(op, image_pull_policy, run_config):
         op.set_retry(
             policy="Always", **run_config.retry_policy.get_for(op.name)
         )
+    if run_config.tolerations.is_set_for(op.name):
+        for toleration in run_config.tolerations.get_for(op.name):
+            op.add_toleration(k8s.V1Toleration(**toleration))
     return op
diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.6.3
+current_version = 0.6.4
 
 [bumpversion:file:setup.py]
 

diff --git a/setup.py b/setup.py
@@ -34,7 +34,7 @@
 
 setup(
     name="kedro-kubeflow",
-    version="0.6.3",
+    version="0.6.4",
     description="Kedro plugin with Kubeflow support",
     long_description=README,
     long_description_content_type="text/markdown",

diff --git a/tests/test_config.py b/tests/test_config.py
@@ -95,6 +95,74 @@ def test_resources_default_and_node_specific(self):
             "memory": "64Mi",
         }
 
+    def test_tolerations_default_only(self):
+        toleration_config = [
+            {
+                "key": "thekey",
+                "operator": "equal",
+                "value": "thevalue",
+                "effect": "NoSchedule",
+            }
+        ]
+        cfg = PluginConfig(
+            {"run_config": {"tolerations": {"__default__": toleration_config}}}
+        )
+        assert cfg.run_config.tolerations.is_set_for("node2")
+        assert cfg.run_config.tolerations.get_for("node2") == toleration_config
+        assert cfg.run_config.tolerations.is_set_for("node3")
+        assert cfg.run_config.tolerations.get_for("node3") == toleration_config
+
+    def test_tolerations_no_default(self):
+        toleration_config = [
+            {
+                "key": "thekey",
+                "operator": "equal",
+                "value": "thevalue",
+                "effect": "NoSchedule",
+            }
+        ]
+        cfg = PluginConfig(
+            {"run_config": {"tolerations": {"node2": toleration_config}}}
+        )
+        assert cfg.run_config.tolerations.is_set_for("node2")
+        assert cfg.run_config.tolerations.get_for("node2") == toleration_config
+        assert cfg.run_config.tolerations.is_set_for("node3") is False
+
+    def test_tolerations_default_and_node_specific(self):
+        toleration_config = [
+            {
+                "key": "thekey",
+                "operator": "equal",
+                "value": "thevalue",
+                "effect": "NoSchedule",
+            }
+        ]
+        default_toleration_config = [
+            {
+                "key": "thekeyfordefault",
+                "operator": "equal",
+                "value": "thevaluefordefault",
+                "effect": "NoSchedule",
+            }
+        ]
+        cfg = PluginConfig(
+            {
+                "run_config": {
+                    "tolerations": {
+                        "__default__": default_toleration_config,
+                        "node2": toleration_config,
+                    }
+                }
+            }
+        )
+        assert cfg.run_config.tolerations.is_set_for("node2")
+        assert cfg.run_config.tolerations.get_for("node2") == toleration_config
+        assert cfg.run_config.tolerations.is_set_for("node3")
+        assert (
+            cfg.run_config.tolerations.get_for("node3")
+            == default_toleration_config
+        )
+
     def test_do_not_keep_volume_by_default(self):
         cfg = PluginConfig({"run_config": {"volume": {}}})
         assert cfg.run_config.volume.keep is False