diff --git a/api/turing/cluster/knative_service.go b/api/turing/cluster/knative_service.go index 48e900e6d..dc00550b5 100644 --- a/api/turing/cluster/knative_service.go +++ b/api/turing/cluster/knative_service.go @@ -50,9 +50,7 @@ type KnativeService struct { TopologySpreadConstraints []corev1.TopologySpreadConstraint `json:"topologySpreadConstraints"` // Resource properties - QueueProxyResourcePercentage int `json:"queueProxyResourcePercentage"` - UserContainerCPULimitRequestFactor float64 `json:"userContainerLimitCPURequestFactor"` - UserContainerMemoryLimitRequestFactor float64 `json:"userContainerLimitMemoryRequestFactor"` + QueueProxyResourcePercentage int `json:"queueProxyResourcePercentage"` } // Creates a new config object compatible with the knative serving API, from @@ -131,12 +129,6 @@ func (cfg *KnativeService) buildSvcSpec( // Revision name revisionName := getDefaultRevisionName(cfg.Name) - // Build resource requirements for the user container - resourceReqs := cfg.buildResourceReqs( - cfg.UserContainerCPULimitRequestFactor, - cfg.UserContainerMemoryLimitRequestFactor, - ) - // Build container spec var portName string // If protocol is using GRPC, add "h2c" which is required for grpc knative @@ -151,7 +143,7 @@ func (cfg *KnativeService) buildSvcSpec( ContainerPort: cfg.ContainerPort, }, }, - Resources: resourceReqs, + Resources: cfg.buildResourceReqs(), VolumeMounts: cfg.VolumeMounts, Env: cfg.Envs, } diff --git a/api/turing/cluster/kubernetes_service.go b/api/turing/cluster/kubernetes_service.go index 91c281231..154b1f335 100644 --- a/api/turing/cluster/kubernetes_service.go +++ b/api/turing/cluster/kubernetes_service.go @@ -73,7 +73,7 @@ func (cfg *KubernetesService) buildStatefulSet(labels map[string]string) *appsv1 Args: cfg.Command, Ports: cfg.buildContainerPorts(), Env: cfg.Envs, - Resources: cfg.buildResourceReqs(defaultCPULimitRequestFactor, defaultMemoryLimitRequestFactor), + Resources: cfg.buildResourceReqs(), VolumeMounts: cfg.VolumeMounts, LivenessProbe: cfg.buildContainerProbe(livenessProbeType, int(cfg.ProbePort)), ReadinessProbe: cfg.buildContainerProbe(readinessProbeType, int(cfg.ProbePort)), diff --git a/api/turing/cluster/models.go b/api/turing/cluster/models.go index f046b3343..2858b7290 100644 --- a/api/turing/cluster/models.go +++ b/api/turing/cluster/models.go @@ -35,8 +35,10 @@ type BaseService struct { Image string `json:"image"` // Resources - CPURequests resource.Quantity `json:"cpu_requests"` - MemoryRequests resource.Quantity `json:"memory_requests"` + CPURequests resource.Quantity `json:"cpu_requests"` + CPULimit *resource.Quantity `json:"cpu_limit"` + MemoryRequests resource.Quantity `json:"memory_requests"` + MemoryLimit *resource.Quantity `json:"memory_limit"` // Health Checks ProbePort int32 `json:"probe_port"` @@ -62,10 +64,7 @@ type BaseService struct { InitContainers []Container `json:"init_containers"` } -func (cfg *BaseService) buildResourceReqs( - UserContainerCPULimitRequestFactor float64, - UserContainerMemoryLimitRequestFactor float64, -) corev1.ResourceRequirements { +func (cfg *BaseService) buildResourceReqs() corev1.ResourceRequirements { reqs := map[corev1.ResourceName]resource.Quantity{ corev1.ResourceCPU: cfg.CPURequests, corev1.ResourceMemory: cfg.MemoryRequests, @@ -73,11 +72,11 @@ func (cfg *BaseService) buildResourceReqs( // Set resource limits to request * userContainerCPULimitRequestFactor or UserContainerMemoryLimitRequestFactor limits := map[corev1.ResourceName]resource.Quantity{} - if UserContainerCPULimitRequestFactor != 0 { - limits[corev1.ResourceCPU] = ComputeResource(cfg.CPURequests, UserContainerCPULimitRequestFactor) + if cfg.CPULimit != nil { + limits[corev1.ResourceCPU] = *cfg.CPULimit } - if UserContainerMemoryLimitRequestFactor != 0 { - limits[corev1.ResourceMemory] = ComputeResource(cfg.MemoryRequests, UserContainerMemoryLimitRequestFactor) + if cfg.MemoryLimit != nil { + limits[corev1.ResourceMemory] = *cfg.MemoryLimit } return corev1.ResourceRequirements{ diff --git a/api/turing/cluster/servicebuilder/router.go b/api/turing/cluster/servicebuilder/router.go index a97a651d4..3147de4ce 100644 --- a/api/turing/cluster/servicebuilder/router.go +++ b/api/turing/cluster/servicebuilder/router.go @@ -138,7 +138,9 @@ func (sb *clusterSvcBuilder) NewRouterService( Namespace: namespace, Image: routerVersion.Image, CPURequests: routerVersion.ResourceRequest.CPURequest, + CPULimit: sb.getCPULimit(routerVersion.ResourceRequest), MemoryRequests: routerVersion.ResourceRequest.MemoryRequest, + MemoryLimit: sb.getMemoryLimit(routerVersion.ResourceRequest), LivenessHTTPGetPath: routerLivenessPath, ReadinessHTTPGetPath: routerReadinessPath, Envs: envs, @@ -148,18 +150,16 @@ func (sb *clusterSvcBuilder) NewRouterService( VolumeMounts: volumeMounts, InitContainers: initContainers, }, - IsClusterLocal: false, - ContainerPort: routerPort, - Protocol: routerVersion.Protocol, - MinReplicas: routerVersion.ResourceRequest.MinReplica, - MaxReplicas: routerVersion.ResourceRequest.MaxReplica, - InitialScale: initialScale, - AutoscalingMetric: string(routerVersion.AutoscalingPolicy.Metric), - AutoscalingTarget: routerVersion.AutoscalingPolicy.Target, - TopologySpreadConstraints: topologySpreadConstraints, - QueueProxyResourcePercentage: sb.knativeServiceConfig.QueueProxyResourcePercentage, - UserContainerCPULimitRequestFactor: sb.knativeServiceConfig.UserContainerCPULimitRequestFactor, - UserContainerMemoryLimitRequestFactor: sb.knativeServiceConfig.UserContainerMemoryLimitRequestFactor, + IsClusterLocal: false, + ContainerPort: routerPort, + Protocol: routerVersion.Protocol, + MinReplicas: routerVersion.ResourceRequest.MinReplica, + MaxReplicas: routerVersion.ResourceRequest.MaxReplica, + InitialScale: initialScale, + AutoscalingMetric: string(routerVersion.AutoscalingPolicy.Metric), + AutoscalingTarget: routerVersion.AutoscalingPolicy.Target, + TopologySpreadConstraints: topologySpreadConstraints, + QueueProxyResourcePercentage: sb.knativeServiceConfig.QueueProxyResourcePercentage, } return sb.validateKnativeService(svc) } @@ -210,17 +210,20 @@ func (sb *clusterSvcBuilder) buildRouterEnvs( sentryDSN string, ver *models.RouterVersion, ) ([]corev1.EnvVar, error) { + envs := sb.getEnvVars(ver.ResourceRequest, nil) + // Add app name, router timeout, jaeger collector - envs := []corev1.EnvVar{ - {Name: envAppName, Value: fmt.Sprintf("%s-%d.%s", ver.Router.Name, ver.Version, namespace)}, - {Name: envAppEnvironment, Value: environmentType}, - {Name: envRouterTimeout, Value: ver.Timeout}, - {Name: envJaegerEndpoint, Value: routerDefaults.JaegerCollectorEndpoint}, - {Name: envRouterConfigFile, Value: routerConfigMapMountPath + routerConfigFileName}, - {Name: envRouterProtocol, Value: string(ver.Protocol)}, - {Name: envSentryEnabled, Value: strconv.FormatBool(sentryEnabled)}, - {Name: envSentryDSN, Value: sentryDSN}, - } + envs = append(envs, + []corev1.EnvVar{ + {Name: envAppName, Value: fmt.Sprintf("%s-%d.%s", ver.Router.Name, ver.Version, namespace)}, + {Name: envAppEnvironment, Value: environmentType}, + {Name: envRouterTimeout, Value: ver.Timeout}, + {Name: envJaegerEndpoint, Value: routerDefaults.JaegerCollectorEndpoint}, + {Name: envRouterConfigFile, Value: routerConfigMapMountPath + routerConfigFileName}, + {Name: envRouterProtocol, Value: string(ver.Protocol)}, + {Name: envSentryEnabled, Value: strconv.FormatBool(sentryEnabled)}, + {Name: envSentryDSN, Value: sentryDSN}, + }...) // Add enricher / ensembler related env vars, if enabled if ver.Enricher != nil { diff --git a/api/turing/cluster/servicebuilder/router_test.go b/api/turing/cluster/servicebuilder/router_test.go index 01de773fe..a4fd0d491 100644 --- a/api/turing/cluster/servicebuilder/router_test.go +++ b/api/turing/cluster/servicebuilder/router_test.go @@ -20,6 +20,10 @@ import ( ) func TestNewRouterService(t *testing.T) { + userContainerMemoryLimitRequestFactor := 1.5 + abcDefaultEnvVar := corev1.EnvVar{Name: "ABC", Value: "true"} + defDefaultEnvVar := corev1.EnvVar{Name: "DEF", Value: "false"} + sb := NewClusterServiceBuilder( resource.MustParse("2"), resource.MustParse("2Gi"), @@ -28,7 +32,8 @@ func TestNewRouterService(t *testing.T) { &config.KnativeServiceDefaults{ QueueProxyResourcePercentage: 20, UserContainerCPULimitRequestFactor: 0, - UserContainerMemoryLimitRequestFactor: 1.5, + UserContainerMemoryLimitRequestFactor: userContainerMemoryLimitRequestFactor, + DefaultEnvVarsWithoutCPULimits: []corev1.EnvVar{abcDefaultEnvVar, defDefaultEnvVar}, }, ) testDataBasePath := filepath.Join("..", "..", "testdata", "cluster", "servicebuilder") @@ -79,6 +84,9 @@ func TestNewRouterService(t *testing.T) { cfgmapNoDefaultRoute, err := tu.ReadFile(filepath.Join(testDataBasePath, "router_configmap_no_default_route.yml")) require.NoError(t, err) + memoryRequest := resource.MustParse("512Mi") + memoryLimit := cluster.ComputeResource(memoryRequest, userContainerMemoryLimitRequestFactor) + testInitialScale := 3 // Define tests @@ -93,7 +101,8 @@ func TestNewRouterService(t *testing.T) { Namespace: "test-project", Image: "asia.gcr.io/gcp-project-id/turing-router:latest", CPURequests: resource.MustParse("400m"), - MemoryRequests: resource.MustParse("512Mi"), + MemoryRequests: memoryRequest, + MemoryLimit: &memoryLimit, LivenessHTTPGetPath: "/v1/internal/live", ReadinessHTTPGetPath: "/v1/internal/ready", ConfigMap: &cluster.ConfigMap{ @@ -109,6 +118,8 @@ func TestNewRouterService(t *testing.T) { }, }, Envs: []corev1.EnvVar{ + abcDefaultEnvVar, + defDefaultEnvVar, {Name: "APP_NAME", Value: "test-svc-1.test-project"}, {Name: "APP_ENVIRONMENT", Value: "test-env"}, {Name: "ROUTER_TIMEOUT", Value: "5s"}, @@ -172,17 +183,15 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - InitialScale: &testInitialScale, - AutoscalingMetric: "concurrency", - AutoscalingTarget: "1", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerCPULimitRequestFactor: 0, - UserContainerMemoryLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + InitialScale: &testInitialScale, + AutoscalingMetric: "concurrency", + AutoscalingTarget: "1", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, }, }, "success | basic upi": { @@ -195,7 +204,8 @@ func TestNewRouterService(t *testing.T) { Namespace: "test-project", Image: "asia.gcr.io/gcp-project-id/turing-router:latest", CPURequests: resource.MustParse("400m"), - MemoryRequests: resource.MustParse("512Mi"), + MemoryRequests: memoryRequest, + MemoryLimit: &memoryLimit, LivenessHTTPGetPath: "/v1/internal/live", ReadinessHTTPGetPath: "/v1/internal/ready", ConfigMap: &cluster.ConfigMap{ @@ -211,6 +221,8 @@ func TestNewRouterService(t *testing.T) { }, }, Envs: []corev1.EnvVar{ + abcDefaultEnvVar, + defDefaultEnvVar, {Name: "APP_NAME", Value: "test-svc-1.test-project"}, {Name: "APP_ENVIRONMENT", Value: "test-env"}, {Name: "ROUTER_TIMEOUT", Value: "5s"}, @@ -274,17 +286,15 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.UPI, - MinReplicas: 2, - MaxReplicas: 4, - InitialScale: &testInitialScale, - AutoscalingMetric: "concurrency", - AutoscalingTarget: "1", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerCPULimitRequestFactor: 0, - UserContainerMemoryLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.UPI, + MinReplicas: 2, + MaxReplicas: 4, + InitialScale: &testInitialScale, + AutoscalingMetric: "concurrency", + AutoscalingTarget: "1", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, }, }, "success | all components": { @@ -296,7 +306,8 @@ func TestNewRouterService(t *testing.T) { Namespace: "test-project", Image: "asia.gcr.io/gcp-project-id/turing-router:latest", CPURequests: resource.MustParse("400m"), - MemoryRequests: resource.MustParse("512Mi"), + MemoryRequests: memoryRequest, + MemoryLimit: &memoryLimit, LivenessHTTPGetPath: "/v1/internal/live", ReadinessHTTPGetPath: "/v1/internal/ready", ConfigMap: &cluster.ConfigMap{ @@ -312,6 +323,8 @@ func TestNewRouterService(t *testing.T) { }, }, Envs: []corev1.EnvVar{ + abcDefaultEnvVar, + defDefaultEnvVar, {Name: "APP_NAME", Value: "test-svc-1.test-project"}, {Name: "APP_ENVIRONMENT", Value: "test-env"}, {Name: "ROUTER_TIMEOUT", Value: "5s"}, @@ -383,16 +396,14 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - AutoscalingMetric: "concurrency", - AutoscalingTarget: "1", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerCPULimitRequestFactor: 0, - UserContainerMemoryLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + AutoscalingMetric: "concurrency", + AutoscalingTarget: "1", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, }, }, "success | standard ensembler with experiment mappings": { @@ -404,7 +415,8 @@ func TestNewRouterService(t *testing.T) { Namespace: "test-project", Image: "asia.gcr.io/gcp-project-id/turing-router:latest", CPURequests: resource.MustParse("400m"), - MemoryRequests: resource.MustParse("512Mi"), + MemoryRequests: memoryRequest, + MemoryLimit: &memoryLimit, LivenessHTTPGetPath: "/v1/internal/live", ReadinessHTTPGetPath: "/v1/internal/ready", ConfigMap: &cluster.ConfigMap{ @@ -420,6 +432,8 @@ func TestNewRouterService(t *testing.T) { }, }, Envs: []corev1.EnvVar{ + abcDefaultEnvVar, + defDefaultEnvVar, {Name: "APP_NAME", Value: "test-svc-1.test-project"}, {Name: "APP_ENVIRONMENT", Value: "test-env"}, {Name: "ROUTER_TIMEOUT", Value: "5s"}, @@ -483,16 +497,14 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - AutoscalingMetric: "rps", - AutoscalingTarget: "100", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerCPULimitRequestFactor: 0, - UserContainerMemoryLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + AutoscalingMetric: "rps", + AutoscalingTarget: "100", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, }, }, "success | standard ensembler with route name path": { @@ -504,7 +516,8 @@ func TestNewRouterService(t *testing.T) { Namespace: "test-project", Image: "asia.gcr.io/gcp-project-id/turing-router:latest", CPURequests: resource.MustParse("400m"), - MemoryRequests: resource.MustParse("512Mi"), + MemoryRequests: memoryRequest, + MemoryLimit: &memoryLimit, LivenessHTTPGetPath: "/v1/internal/live", ReadinessHTTPGetPath: "/v1/internal/ready", ConfigMap: &cluster.ConfigMap{ @@ -520,6 +533,8 @@ func TestNewRouterService(t *testing.T) { }, }, Envs: []corev1.EnvVar{ + abcDefaultEnvVar, + defDefaultEnvVar, {Name: "APP_NAME", Value: "test-svc-1.test-project"}, {Name: "APP_ENVIRONMENT", Value: "test-env"}, {Name: "ROUTER_TIMEOUT", Value: "5s"}, @@ -583,16 +598,14 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - AutoscalingMetric: "rps", - AutoscalingTarget: "100", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerCPULimitRequestFactor: 0, - UserContainerMemoryLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + AutoscalingMetric: "rps", + AutoscalingTarget: "100", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, }, }, "success | standard ensembler lazy routing": { @@ -604,7 +617,8 @@ func TestNewRouterService(t *testing.T) { Namespace: "test-project", Image: "asia.gcr.io/gcp-project-id/turing-router:latest", CPURequests: resource.MustParse("400m"), - MemoryRequests: resource.MustParse("512Mi"), + MemoryRequests: memoryRequest, + MemoryLimit: &memoryLimit, LivenessHTTPGetPath: "/v1/internal/live", ReadinessHTTPGetPath: "/v1/internal/ready", ConfigMap: &cluster.ConfigMap{ @@ -620,6 +634,8 @@ func TestNewRouterService(t *testing.T) { }, }, Envs: []corev1.EnvVar{ + abcDefaultEnvVar, + defDefaultEnvVar, {Name: "APP_NAME", Value: "test-svc-1.test-project"}, {Name: "APP_ENVIRONMENT", Value: "test-env"}, {Name: "ROUTER_TIMEOUT", Value: "5s"}, @@ -683,16 +699,14 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - AutoscalingMetric: "rps", - AutoscalingTarget: "100", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerCPULimitRequestFactor: 0, - UserContainerMemoryLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + AutoscalingMetric: "rps", + AutoscalingTarget: "100", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, }, }, "success | traffic-splitting": { @@ -704,7 +718,8 @@ func TestNewRouterService(t *testing.T) { Namespace: "test-project", Image: "asia.gcr.io/gcp-project-id/turing-router:latest", CPURequests: resource.MustParse("400m"), - MemoryRequests: resource.MustParse("512Mi"), + MemoryRequests: memoryRequest, + MemoryLimit: &memoryLimit, LivenessHTTPGetPath: "/v1/internal/live", ReadinessHTTPGetPath: "/v1/internal/ready", ConfigMap: &cluster.ConfigMap{ @@ -720,6 +735,8 @@ func TestNewRouterService(t *testing.T) { }, }, Envs: []corev1.EnvVar{ + abcDefaultEnvVar, + defDefaultEnvVar, {Name: "APP_NAME", Value: "test-svc-1.test-project"}, {Name: "APP_ENVIRONMENT", Value: "test-env"}, {Name: "ROUTER_TIMEOUT", Value: "5s"}, @@ -783,16 +800,14 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - AutoscalingMetric: "concurrency", - AutoscalingTarget: "1", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerCPULimitRequestFactor: 0, - UserContainerMemoryLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + AutoscalingMetric: "concurrency", + AutoscalingTarget: "1", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, }, }, "success | experiment engine": { @@ -804,7 +819,8 @@ func TestNewRouterService(t *testing.T) { Namespace: "test-project", Image: "ghcr.io/caraml-dev/turing/turing-router:latest", CPURequests: resource.MustParse("400m"), - MemoryRequests: resource.MustParse("512Mi"), + MemoryRequests: memoryRequest, + MemoryLimit: &memoryLimit, LivenessHTTPGetPath: "/v1/internal/live", ReadinessHTTPGetPath: "/v1/internal/ready", ConfigMap: &cluster.ConfigMap{ @@ -820,6 +836,8 @@ func TestNewRouterService(t *testing.T) { }, }, Envs: []corev1.EnvVar{ + abcDefaultEnvVar, + defDefaultEnvVar, {Name: "APP_NAME", Value: "router-with-exp-engine-1.test-project"}, {Name: "APP_ENVIRONMENT", Value: "test-env"}, {Name: "ROUTER_TIMEOUT", Value: "5s"}, @@ -912,16 +930,14 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - AutoscalingMetric: "rps", - AutoscalingTarget: "100", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerCPULimitRequestFactor: 0, - UserContainerMemoryLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + AutoscalingMetric: "rps", + AutoscalingTarget: "100", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, }, }, "success | no default route": { @@ -933,7 +949,8 @@ func TestNewRouterService(t *testing.T) { Namespace: "test-project", Image: "asia.gcr.io/gcp-project-id/turing-router:latest", CPURequests: resource.MustParse("400m"), - MemoryRequests: resource.MustParse("512Mi"), + MemoryRequests: memoryRequest, + MemoryLimit: &memoryLimit, LivenessHTTPGetPath: "/v1/internal/live", ReadinessHTTPGetPath: "/v1/internal/ready", ConfigMap: &cluster.ConfigMap{ @@ -949,6 +966,8 @@ func TestNewRouterService(t *testing.T) { }, }, Envs: []corev1.EnvVar{ + abcDefaultEnvVar, + defDefaultEnvVar, {Name: "APP_NAME", Value: "test-svc-1.test-project"}, {Name: "APP_ENVIRONMENT", Value: "test-env"}, {Name: "ROUTER_TIMEOUT", Value: "5s"}, @@ -989,16 +1008,14 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - AutoscalingMetric: "memory", - AutoscalingTarget: "90", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerCPULimitRequestFactor: 0, - UserContainerMemoryLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + AutoscalingMetric: "memory", + AutoscalingTarget: "90", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, }, }, "failure missing bigquery": { diff --git a/api/turing/cluster/servicebuilder/service_builder.go b/api/turing/cluster/servicebuilder/service_builder.go index 51fc7907f..32e2f298f 100644 --- a/api/turing/cluster/servicebuilder/service_builder.go +++ b/api/turing/cluster/servicebuilder/service_builder.go @@ -206,23 +206,23 @@ func (sb *clusterSvcBuilder) NewEnricherService( Namespace: namespace, Image: enricher.Image, CPURequests: enricher.ResourceRequest.CPURequest, + CPULimit: sb.getCPULimit(enricher.ResourceRequest), MemoryRequests: enricher.ResourceRequest.MemoryRequest, - Envs: enricher.Env.ToKubernetesEnvVars(), + MemoryLimit: sb.getMemoryLimit(enricher.ResourceRequest), + Envs: sb.getEnvVars(enricher.ResourceRequest, &enricher.Env), Labels: buildLabels(project, routerVersion.Router), Volumes: volumes, VolumeMounts: volumeMounts, }, - IsClusterLocal: true, - ContainerPort: int32(enricher.Port), - MinReplicas: enricher.ResourceRequest.MinReplica, - MaxReplicas: enricher.ResourceRequest.MaxReplica, - InitialScale: initialScale, - AutoscalingMetric: string(enricher.AutoscalingPolicy.Metric), - AutoscalingTarget: enricher.AutoscalingPolicy.Target, - TopologySpreadConstraints: topologySpreadConstraints, - QueueProxyResourcePercentage: sb.knativeServiceConfig.QueueProxyResourcePercentage, - UserContainerCPULimitRequestFactor: sb.knativeServiceConfig.UserContainerCPULimitRequestFactor, - UserContainerMemoryLimitRequestFactor: sb.knativeServiceConfig.UserContainerMemoryLimitRequestFactor, + IsClusterLocal: true, + ContainerPort: int32(enricher.Port), + MinReplicas: enricher.ResourceRequest.MinReplica, + MaxReplicas: enricher.ResourceRequest.MaxReplica, + InitialScale: initialScale, + AutoscalingMetric: string(enricher.AutoscalingPolicy.Metric), + AutoscalingTarget: enricher.AutoscalingPolicy.Target, + TopologySpreadConstraints: topologySpreadConstraints, + QueueProxyResourcePercentage: sb.knativeServiceConfig.QueueProxyResourcePercentage, }) } @@ -292,23 +292,23 @@ func (sb *clusterSvcBuilder) NewEnsemblerService( Namespace: namespace, Image: docker.Image, CPURequests: docker.ResourceRequest.CPURequest, + CPULimit: sb.getCPULimit(docker.ResourceRequest), MemoryRequests: docker.ResourceRequest.MemoryRequest, - Envs: docker.Env.ToKubernetesEnvVars(), + MemoryLimit: sb.getMemoryLimit(docker.ResourceRequest), + Envs: sb.getEnvVars(docker.ResourceRequest, &docker.Env), Labels: buildLabels(project, routerVersion.Router), Volumes: volumes, VolumeMounts: volumeMounts, }, - IsClusterLocal: true, - ContainerPort: int32(docker.Port), - MinReplicas: docker.ResourceRequest.MinReplica, - MaxReplicas: docker.ResourceRequest.MaxReplica, - InitialScale: initialScale, - AutoscalingMetric: string(docker.AutoscalingPolicy.Metric), - AutoscalingTarget: docker.AutoscalingPolicy.Target, - TopologySpreadConstraints: topologySpreadConstraints, - QueueProxyResourcePercentage: sb.knativeServiceConfig.QueueProxyResourcePercentage, - UserContainerCPULimitRequestFactor: sb.knativeServiceConfig.UserContainerCPULimitRequestFactor, - UserContainerMemoryLimitRequestFactor: sb.knativeServiceConfig.UserContainerMemoryLimitRequestFactor, + IsClusterLocal: true, + ContainerPort: int32(docker.Port), + MinReplicas: docker.ResourceRequest.MinReplica, + MaxReplicas: docker.ResourceRequest.MaxReplica, + InitialScale: initialScale, + AutoscalingMetric: string(docker.AutoscalingPolicy.Metric), + AutoscalingTarget: docker.AutoscalingPolicy.Target, + TopologySpreadConstraints: topologySpreadConstraints, + QueueProxyResourcePercentage: sb.knativeServiceConfig.QueueProxyResourcePercentage, }) } @@ -404,6 +404,40 @@ func (sb *clusterSvcBuilder) getTopologySpreadConstraints() ([]corev1.TopologySp return topologySpreadConstraints, nil } +func (sb *clusterSvcBuilder) getCPULimit(resourceRequest *models.ResourceRequest) *resource.Quantity { + if resourceRequest != nil && resourceRequest.CPULimit.IsZero() { + if sb.knativeServiceConfig.UserContainerCPULimitRequestFactor != 0 { + cpuLimit := cluster.ComputeResource(resourceRequest.CPURequest, + sb.knativeServiceConfig.UserContainerCPULimitRequestFactor) + return &cpuLimit + } else { + return nil + } + } + return &resourceRequest.CPULimit +} + +func (sb *clusterSvcBuilder) getMemoryLimit(resourceRequest *models.ResourceRequest) *resource.Quantity { + if resourceRequest != nil && sb.knativeServiceConfig.UserContainerMemoryLimitRequestFactor != 0 { + memoryLimit := cluster.ComputeResource(resourceRequest.MemoryRequest, + sb.knativeServiceConfig.UserContainerMemoryLimitRequestFactor) + return &memoryLimit + } + return nil +} + +func (sb *clusterSvcBuilder) getEnvVars(resourceRequest *models.ResourceRequest, + userEnvVars *models.EnvVars) (newEnvVars []corev1.EnvVar) { + if resourceRequest != nil && resourceRequest.CPULimit.IsZero() && + sb.knativeServiceConfig.UserContainerCPULimitRequestFactor == 0 { + newEnvVars = append(newEnvVars, sb.knativeServiceConfig.DefaultEnvVarsWithoutCPULimits...) + } + if userEnvVars != nil { + newEnvVars = append(newEnvVars, userEnvVars.ToKubernetesEnvVars()...) + } + return +} + func GetComponentName(routerVersion *models.RouterVersion, componentType string) string { return fmt.Sprintf("%s-turing-%s-%d", routerVersion.Router.Name, componentType, routerVersion.Version) } diff --git a/api/turing/cluster/servicebuilder/service_builder_test.go b/api/turing/cluster/servicebuilder/service_builder_test.go index e26df6a69..7f40cf3ce 100644 --- a/api/turing/cluster/servicebuilder/service_builder_test.go +++ b/api/turing/cluster/servicebuilder/service_builder_test.go @@ -43,6 +43,9 @@ type testSuiteNewService struct { } func TestNewEnricherService(t *testing.T) { + userContainerCPULimitRequestFactor := 2.0 + userContainerMemoryLimitRequestFactor := 1.5 + sb := NewClusterServiceBuilder( resource.MustParse("2"), resource.MustParse("2Gi"), @@ -50,10 +53,17 @@ func TestNewEnricherService(t *testing.T) { testTopologySpreadConstraints, &config.KnativeServiceDefaults{ QueueProxyResourcePercentage: 10, - UserContainerCPULimitRequestFactor: 0, - UserContainerMemoryLimitRequestFactor: 1.5, + UserContainerCPULimitRequestFactor: userContainerCPULimitRequestFactor, + UserContainerMemoryLimitRequestFactor: userContainerMemoryLimitRequestFactor, }, ) + + cpuRequest := resource.MustParse("400m") + cpuLimit := cluster.ComputeResource(cpuRequest, userContainerCPULimitRequestFactor) + + memoryRequest := resource.MustParse("256Mi") + memoryLimit := cluster.ComputeResource(memoryRequest, userContainerMemoryLimitRequestFactor) + testDataBasePath := filepath.Join("..", "..", "testdata", "cluster", "servicebuilder") testInitialScale := 5 @@ -66,8 +76,10 @@ func TestNewEnricherService(t *testing.T) { Name: "test-svc-turing-enricher-1", Namespace: "test-project", Image: "asia.gcr.io/gcp-project-id/echo:1.0.2", - CPURequests: resource.MustParse("400m"), - MemoryRequests: resource.MustParse("256Mi"), + CPURequests: cpuRequest, + CPULimit: &cpuLimit, + MemoryRequests: memoryRequest, + MemoryLimit: &memoryLimit, Envs: []corev1.EnvVar{ {Name: "TEST_ENV", Value: "enricher"}, {Name: "GOOGLE_APPLICATION_CREDENTIALS", Value: "/var/secret/enricher-service-account.json"}, @@ -91,17 +103,15 @@ func TestNewEnricherService(t *testing.T) { }, VolumeMounts: []corev1.VolumeMount{{Name: secretVolume, MountPath: secretMountPath}}, }, - IsClusterLocal: true, - ContainerPort: 8080, - MinReplicas: 1, - MaxReplicas: 2, - InitialScale: &testInitialScale, - AutoscalingMetric: "concurrency", - AutoscalingTarget: "1", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 10, - UserContainerCPULimitRequestFactor: 0, - UserContainerMemoryLimitRequestFactor: 1.5, + IsClusterLocal: true, + ContainerPort: 8080, + MinReplicas: 1, + MaxReplicas: 2, + InitialScale: &testInitialScale, + AutoscalingMetric: "concurrency", + AutoscalingTarget: "1", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 10, }, }, "failure": { @@ -134,6 +144,9 @@ func TestNewEnricherService(t *testing.T) { } func TestNewEnsemblerService(t *testing.T) { + userContainerCPULimitRequestFactor := 2.0 + userContainerMemoryLimitRequestFactor := 1.5 + sb := NewClusterServiceBuilder( resource.MustParse("2"), resource.MustParse("2Gi"), @@ -141,10 +154,17 @@ func TestNewEnsemblerService(t *testing.T) { testTopologySpreadConstraints, &config.KnativeServiceDefaults{ QueueProxyResourcePercentage: 20, - UserContainerCPULimitRequestFactor: 0, - UserContainerMemoryLimitRequestFactor: 1.5, + UserContainerCPULimitRequestFactor: userContainerCPULimitRequestFactor, + UserContainerMemoryLimitRequestFactor: userContainerMemoryLimitRequestFactor, }, ) + + cpuRequest := resource.MustParse("200m") + cpuLimit := cluster.ComputeResource(cpuRequest, userContainerCPULimitRequestFactor) + + memoryRequest := resource.MustParse("1024Mi") + memoryLimit := cluster.ComputeResource(memoryRequest, userContainerMemoryLimitRequestFactor) + testDataBasePath := filepath.Join("..", "..", "testdata", "cluster", "servicebuilder") testInitialScale := 5 @@ -157,8 +177,10 @@ func TestNewEnsemblerService(t *testing.T) { Name: "test-svc-turing-ensembler-1", Namespace: "test-project", Image: "asia.gcr.io/gcp-project-id/echo:1.0.2", - CPURequests: resource.MustParse("200m"), - MemoryRequests: resource.MustParse("1024Mi"), + CPURequests: cpuRequest, + CPULimit: &cpuLimit, + MemoryRequests: memoryRequest, + MemoryLimit: &memoryLimit, Envs: []corev1.EnvVar{ {Name: "TEST_ENV", Value: "ensembler"}, {Name: "GOOGLE_APPLICATION_CREDENTIALS", Value: "/var/secret/ensembler-service-account.json"}, @@ -181,17 +203,15 @@ func TestNewEnsemblerService(t *testing.T) { }, VolumeMounts: []corev1.VolumeMount{{Name: secretVolume, MountPath: secretMountPath}}, }, - IsClusterLocal: true, - ContainerPort: 8080, - MinReplicas: 2, - MaxReplicas: 3, - AutoscalingMetric: "concurrency", - AutoscalingTarget: "1", - InitialScale: &testInitialScale, - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerCPULimitRequestFactor: 0, - UserContainerMemoryLimitRequestFactor: 1.5, + IsClusterLocal: true, + ContainerPort: 8080, + MinReplicas: 2, + MaxReplicas: 3, + AutoscalingMetric: "concurrency", + AutoscalingTarget: "1", + InitialScale: &testInitialScale, + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, }, }, "success with ensembler docker type": { @@ -201,8 +221,10 @@ func TestNewEnsemblerService(t *testing.T) { Name: "test-svc-turing-ensembler-1", Namespace: "test-project", Image: "asia.gcr.io/gcp-project-id/echo:1.0.2", - CPURequests: resource.MustParse("200m"), - MemoryRequests: resource.MustParse("1024Mi"), + CPURequests: cpuRequest, + CPULimit: &cpuLimit, + MemoryRequests: memoryRequest, + MemoryLimit: &memoryLimit, Envs: []corev1.EnvVar{ {Name: "TEST_ENV", Value: "ensembler"}, {Name: "GOOGLE_APPLICATION_CREDENTIALS", Value: "/var/secret/ensembler-service-account.json"}, @@ -225,16 +247,14 @@ func TestNewEnsemblerService(t *testing.T) { }, VolumeMounts: []corev1.VolumeMount{{Name: secretVolume, MountPath: secretMountPath}}, }, - IsClusterLocal: true, - ContainerPort: 8080, - MinReplicas: 2, - MaxReplicas: 3, - AutoscalingMetric: "cpu", - AutoscalingTarget: "90", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerCPULimitRequestFactor: 0, - UserContainerMemoryLimitRequestFactor: 1.5, + IsClusterLocal: true, + ContainerPort: 8080, + MinReplicas: 2, + MaxReplicas: 3, + AutoscalingMetric: "cpu", + AutoscalingTarget: "90", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, }, }, "failure": { diff --git a/api/turing/config/config.go b/api/turing/config/config.go index 2eb40bf7f..cc3ef5fbc 100644 --- a/api/turing/config/config.go +++ b/api/turing/config/config.go @@ -263,8 +263,9 @@ type KubernetesLabelConfigs struct { // Knative services type KnativeServiceDefaults struct { QueueProxyResourcePercentage int - UserContainerCPULimitRequestFactor float64 `json:"userContainerLimitCPURequestFactor"` - UserContainerMemoryLimitRequestFactor float64 `json:"userContainerLimitMemoryRequestFactor"` + UserContainerCPULimitRequestFactor float64 + UserContainerMemoryLimitRequestFactor float64 + DefaultEnvVarsWithoutCPULimits []corev1.EnvVar } // SinglePageApplicationConfig holds configuration required for serving SPAs diff --git a/api/turing/config/config_test.go b/api/turing/config/config_test.go index c07180fde..a08ea9104 100644 --- a/api/turing/config/config_test.go +++ b/api/turing/config/config_test.go @@ -283,6 +283,12 @@ func TestLoad(t *testing.T) { QueueProxyResourcePercentage: 20, UserContainerCPULimitRequestFactor: 0, UserContainerMemoryLimitRequestFactor: 1.25, + DefaultEnvVarsWithoutCPULimits: []corev1.EnvVar{ + { + Name: "foo", + Value: "bar", + }, + }, }, RouterDefaults: &config.RouterDefaults{ LogLevel: "INFO", @@ -425,6 +431,12 @@ func TestLoad(t *testing.T) { QueueProxyResourcePercentage: 20, UserContainerCPULimitRequestFactor: 0, UserContainerMemoryLimitRequestFactor: 1.25, + DefaultEnvVarsWithoutCPULimits: []corev1.EnvVar{ + { + Name: "foo", + Value: "bar", + }, + }, }, RouterDefaults: &config.RouterDefaults{ LogLevel: "INFO", @@ -554,6 +566,12 @@ func TestLoad(t *testing.T) { QueueProxyResourcePercentage: 20, UserContainerCPULimitRequestFactor: 0, UserContainerMemoryLimitRequestFactor: 1.25, + DefaultEnvVarsWithoutCPULimits: []corev1.EnvVar{ + { + Name: "foo", + Value: "bar", + }, + }, }, DeployConfig: &config.DeploymentConfig{ EnvironmentType: "dev", diff --git a/api/turing/config/testdata/config-1.yaml b/api/turing/config/testdata/config-1.yaml index ff2a7aeac..968217574 100644 --- a/api/turing/config/testdata/config-1.yaml +++ b/api/turing/config/testdata/config-1.yaml @@ -48,6 +48,9 @@ KnativeServiceDefaults: QueueProxyResourcePercentage: 20 UserContainerCPULimitRequestFactor: 0 UserContainerMemoryLimitRequestFactor: 1.25 + DefaultEnvVarsWithoutCPULimits: + - Name: foo + Value: bar RouterDefaults: FluentdConfig: FlushIntervalSeconds: 60 diff --git a/api/turing/models/resource_request.go b/api/turing/models/resource_request.go index 0d3e3f044..cba2fbc62 100644 --- a/api/turing/models/resource_request.go +++ b/api/turing/models/resource_request.go @@ -16,6 +16,8 @@ type ResourceRequest struct { // CPU request of inference service CPURequest resource.Quantity `json:"cpu_request"` + // CPU limit of inference service + CPULimit resource.Quantity `json:"cpu_limit"` // Memory request of inference service MemoryRequest resource.Quantity `json:"memory_request"` }