Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PAASTA-18198: add pool_node_affinities to improve Karpenter AZ balance #3849

Merged
merged 13 commits into from
May 2, 2024
33 changes: 30 additions & 3 deletions paasta_tools/kubernetes_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,13 @@
"hostname",
"owner",
}
ZONE_LABELS = (
"topology.kubernetes.io/zone",
"yelp.com/habitat",
"yelp.com/eni_config",
"karpenter.sh/nodepool",
"topology.ebs.csi.aws.com/zone",
)

GPU_RESOURCE_NAME = "nvidia.com/gpu"
DEFAULT_STORAGE_CLASS_NAME = "ebs"
Expand Down Expand Up @@ -681,6 +688,10 @@ def registration_label(namespace: str) -> str:
return f"registrations.{PAASTA_ATTRIBUTE_PREFIX}{limited_namespace}"


def contains_zone_label(node_selectors: Dict[str, NodeSelectorConfig]) -> bool:
return any(k in node_selectors for k in ZONE_LABELS)


class KubernetesDeploymentConfig(LongRunningServiceConfig):
config_dict: KubernetesDeploymentConfigDict

Expand Down Expand Up @@ -2139,7 +2150,9 @@ def get_pod_template_spec(
)
# need to check if there are node selectors/affinities. if there are none
# and we create an empty affinity object, k8s will deselect all nodes.
node_affinity = self.get_node_affinity()
node_affinity = self.get_node_affinity(
system_paasta_config.get_pool_node_affinities()
)
if node_affinity is not None:
pod_spec_kwargs["affinity"] = V1Affinity(node_affinity=node_affinity)

Expand Down Expand Up @@ -2283,7 +2296,9 @@ def get_node_selector(self) -> Mapping[str, str]:
node_selectors["yelp.com/pool"] = self.get_pool()
return node_selectors

def get_node_affinity(self) -> Optional[V1NodeAffinity]:
def get_node_affinity(
self, pool_node_affinities: Dict[str, Dict[str, List[str]]] = None
) -> Optional[V1NodeAffinity]:
"""Converts deploy_whitelist and deploy_blacklist in node affinities.

note: At the time of writing, `kubectl describe` does not show affinities,
Expand All @@ -2293,12 +2308,24 @@ def get_node_affinity(self) -> Optional[V1NodeAffinity]:
allowlist=self.get_deploy_whitelist(),
denylist=self.get_deploy_blacklist(),
)
node_selectors = self.config_dict.get("node_selectors", {})
requirements.extend(
raw_selectors_to_requirements(
raw_selectors=self.config_dict.get("node_selectors", {}),
raw_selectors=node_selectors,
)
)

# PAASTA-18198: To improve AZ balance with Karpenter, we temporarily allow specifying zone affinities per pool
if pool_node_affinities and self.get_pool() in pool_node_affinities:
current_pool_node_affinities = pool_node_affinities[self.get_pool()]
gmdfalk marked this conversation as resolved.
Show resolved Hide resolved
# If the service already has a node selector for a zone, we don't want to override it
if current_pool_node_affinities and not contains_zone_label(node_selectors):
requirements.extend(
raw_selectors_to_requirements(
raw_selectors=current_pool_node_affinities,
)
)

preferred_terms = []
for node_selectors_prefered_config_dict in self.config_dict.get(
"node_selectors_preferred", []
Expand Down
5 changes: 5 additions & 0 deletions paasta_tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2020,6 +2020,7 @@ class SystemPaastaConfigDict(TypedDict, total=False):
pdb_max_unavailable: Union[str, int]
pki_backend: str
pod_defaults: Dict[str, Any]
pool_node_affinities: Dict[str, Dict[str, List[str]]]
topology_spread_constraints: List[TopologySpreadConstraintDict]
previous_marathon_servers: List[MarathonConfigDict]
readiness_check_prefix_template: List[str]
Expand Down Expand Up @@ -2645,6 +2646,10 @@ def get_taskproc(self) -> Dict:
def get_disabled_watchers(self) -> List:
return self.config_dict.get("disabled_watchers", [])

def get_pool_node_affinities(self) -> Dict[str, Dict[str, List[str]]]:
"""Node selectors that will be applied to all Pods in a pool"""
return self.config_dict.get("pool_node_affinities", {})

def get_topology_spread_constraints(self) -> List[TopologySpreadConstraintDict]:
"""List of TopologySpreadConstraints that will be applied to all Pods in the cluster"""
return self.config_dict.get("topology_spread_constraints", [])
Expand Down
112 changes: 112 additions & 0 deletions tests/test_kubernetes_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2029,6 +2029,118 @@ def test_get_node_affinity_with_preferences(self):
],
)

def test_get_node_affinity_no_reqs_with_global_override(self):
"""
Given global node affinity overrides and no deployment specific requirements, the globals should be used
"""
assert self.deployment.get_node_affinity(
{"default": {"topology.kubernetes.io/zone": ["us-west-1a", "us-west-1b"]}},
) == V1NodeAffinity(
required_during_scheduling_ignored_during_execution=V1NodeSelector(
node_selector_terms=[
V1NodeSelectorTerm(
match_expressions=[
V1NodeSelectorRequirement(
key="topology.kubernetes.io/zone",
operator="In",
values=["us-west-1a", "us-west-1b"],
)
]
)
],
),
)

def test_get_node_affinity_no_reqs_with_global_override_and_deployment_config(self):
"""
Given global node affinity overrides and deployment specific requirements, globals should be ignored
"""
deployment = KubernetesDeploymentConfig(
service="kurupt",
instance="fm",
cluster="brentford",
config_dict={
"node_selectors": {"topology.kubernetes.io/zone": ["us-west-1a"]},
"node_selectors_preferred": [
{
"weight": 1,
"preferences": {
"instance_type": ["a1.1xlarge"],
},
}
],
},
branch_dict=None,
soa_dir="/nail/blah",
)
actual = deployment.get_node_affinity(
{"default": {"topology.kubernetes.io/zone": ["us-west-1a", "us-west-1b"]}},
)
expected = V1NodeAffinity(
required_during_scheduling_ignored_during_execution=V1NodeSelector(
node_selector_terms=[
V1NodeSelectorTerm(
match_expressions=[
V1NodeSelectorRequirement(
key="topology.kubernetes.io/zone",
operator="In",
values=["us-west-1a"],
),
]
)
],
),
preferred_during_scheduling_ignored_during_execution=[
V1PreferredSchedulingTerm(
weight=1,
preference=V1NodeSelectorTerm(
match_expressions=[
V1NodeSelectorRequirement(
key="node.kubernetes.io/instance-type",
operator="In",
values=["a1.1xlarge"],
),
]
),
)
],
)
assert actual == expected

def test_get_node_affinity_no_reqs_with_global_override_and_deployment_config_habitat(
self,
):
"""
Given global node affinity overrides and deployment specific zone selector, globals should be ignored
"""
deployment = KubernetesDeploymentConfig(
service="kurupt",
instance="fm",
cluster="brentford",
config_dict={"node_selectors": {"yelp.com/habitat": ["uswest1astagef"]}},
branch_dict=None,
soa_dir="/nail/blah",
)
actual = deployment.get_node_affinity(
{"default": {"topology.kubernetes.io/zone": ["us-west-1a", "us-west-1b"]}},
)
expected = V1NodeAffinity(
required_during_scheduling_ignored_during_execution=V1NodeSelector(
node_selector_terms=[
V1NodeSelectorTerm(
match_expressions=[
V1NodeSelectorRequirement(
key="yelp.com/habitat",
operator="In",
values=["uswest1astagef"],
),
]
)
],
)
)
assert actual == expected

@pytest.mark.parametrize(
"anti_affinity,expected",
[
Expand Down