/
remoteshuffleservice_types.go
371 lines (300 loc) · 14.6 KB
/
remoteshuffleservice_types.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package v1alpha1
import (
autoscalingv2 "k8s.io/api/autoscaling/v2beta2"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
type ShuffleServerUpgradeStrategyType string
const (
PartitionUpgrade ShuffleServerUpgradeStrategyType = "PartitionUpgrade"
SpecificUpgrade ShuffleServerUpgradeStrategyType = "SpecificUpgrade"
FullUpgrade ShuffleServerUpgradeStrategyType = "FullUpgrade"
FullRestart ShuffleServerUpgradeStrategyType = "FullRestart"
)
type RSSPhase string
const (
// RSSPending represents RSS object is pending.
RSSPending RSSPhase = "Pending"
// RSSRunning represents RSS object is running normally.
RSSRunning RSSPhase = "Running"
// RSSTerminating represents RSS object is terminating.
RSSTerminating RSSPhase = "Terminating"
// RSSFailed represents RSS object has been failed.
RSSFailed RSSPhase = "Failed"
// RSSUpgrading represents RSS object is upgrading.
RSSUpgrading RSSPhase = "Upgrading"
)
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
// RemoteShuffleServiceSpec defines the desired state of RemoteShuffleService.
type RemoteShuffleServiceSpec struct {
// Coordinator contains configurations of the coordinators.
Coordinator *CoordinatorConfig `json:"coordinator"`
// ShuffleServer contains configuration of the shuffle servers.
ShuffleServer *ShuffleServerConfig `json:"shuffleServer"`
// ConfigMapName indicates configMap name stores configurations of coordinators and shuffle servers.
ConfigMapName string `json:"configMapName"`
// +optional
ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"`
}
// CoordinatorConfig records configuration used to generate workload of coordination.
type CoordinatorConfig struct {
*CommonConfig `json:",inline"`
// +kubebuilder:default:=true
// Sync indicates whether we need to sync configurations to the running coordinators.
// +optional
Sync *bool `json:"sync,omitempty"`
// +kubebuilder:default:=2
// Count is the number of coordinator workloads to be generated.
// +optional
Count *int32 `json:"count,omitempty"`
// +kubebuilder:default:=1
// Replicas is the initial replicas of coordinators.
// +optional
Replicas *int32 `json:"replicas,omitempty"`
// +kubebuilder:default:=19997
// RPCPort defines rpc port used by coordinators.
// +optional
RPCPort *int32 `json:"rpcPort,omitempty"`
// +kubebuilder:default:=19996
// HTTPPort defines http port used by coordinators.
// +optional
HTTPPort *int32 `json:"httpPort,omitempty"`
// +kubebuilder:default:=/config/exclude_nodes
// ExcludeNodesFilePath indicates exclude nodes file path in coordinators' containers.
// +optional
ExcludeNodesFilePath string `json:"excludeNodesFilePath,omitempty"`
// RPCNodePort defines rpc port of node port service used for coordinators' external access.
// +optional
RPCNodePort []int32 `json:"rpcNodePort,omitempty"`
// HTTPNodePort defines http port of node port service used for coordinators' external access.
// +optional
HTTPNodePort []int32 `json:"httpNodePort,omitempty"`
}
// ShuffleServerConfig records configuration used to generate workload of shuffle servers.
type ShuffleServerConfig struct {
*CommonConfig `json:",inline"`
// +kubebuilder:default:=false
// Sync indicates whether we need to sync configurations to the running shuffle servers.
// +optional
Sync *bool `json:"sync,omitempty"`
// +kubebuilder:default:=1
// Replicas is the initial replicas of shuffle servers.
// +optional
Replicas *int32 `json:"replicas,omitempty"`
// +kubebuilder:default:=19997
// RPCPort defines rpc port used by shuffle servers.
// +optional
RPCPort *int32 `json:"rpcPort,omitempty"`
// +kubebuilder:default:=19996
// HTTPPort defines http port used by shuffle servers.
// +optional
HTTPPort *int32 `json:"httpPort,omitempty"`
// RPCNodePort defines rpc port of node port service used for shuffle servers' external access.
// +optional
RPCNodePort *int32 `json:"rpcNodePort,omitempty"`
// HTTPNodePort defines http port of node port service used for shuffle servers' external access.
// +optional
HTTPNodePort *int32 `json:"httpNodePort,omitempty"`
// UpgradeStrategy defines upgrade strategy of shuffle servers.
UpgradeStrategy *ShuffleServerUpgradeStrategy `json:"upgradeStrategy"`
}
// ShuffleServerUpgradeStrategy defines upgrade strategy of shuffle servers.
type ShuffleServerUpgradeStrategy struct {
// Type represents upgrade type of shuffle servers, including partition, specific copy and full upgrade.
Type ShuffleServerUpgradeStrategyType `json:"type"`
// Partition means the minimum number that needs to be upgraded, the copies whose numbers are greater than or
// equal to this number needs to be upgraded.
// +optional
Partition *int32 `json:"partition,omitempty"`
// SpecificNames indicates the specific pod names of shuffle servers which we want to upgrade.
// +optional
SpecificNames []string `json:"specificNames,omitempty"`
}
// CommonConfig defines the common fields of coordinators and shuffle servers.
type CommonConfig struct {
*RSSPodSpec `json:",inline"`
// XmxSize defines xmx size of coordinators or shuffle servers.
XmxSize string `json:"xmxSize"`
// ConfigDir records the directory where the configuration of coordinators or shuffle servers resides.
ConfigDir string `json:"configDir"`
// Parameters holds the optional parameters used by coordinators or shuffle servers.
// +optional
Parameters map[string]string `json:"parameters,omitempty"`
// Autoscaler defines desired functionality of HPA object to be generated.
// +optional
Autoscaler RSSAutoscaler `json:"autoscaler,omitempty"`
}
// RSSAutoscaler describes the desired functionality of the HPA object to be generated,
// which automatically manages the replica count of any resource implementing the scale
// subresource based on the metrics specified.
type RSSAutoscaler struct {
// Enable indicates whether we need to generate an HPA object.
Enable bool `json:"enable"`
// HPASpec allows users to configure HPA objects to achieve automatic scaling.
// This field is very similar to autoscalingv2.HorizontalPodAutoscalerSpec, but
// in autoscalingv2.HorizontalPodAutoscalerSpec, ScaleTargetRef is a required
// field, while the rss object does not require users to specify this field.
// Therefore, we have redefined a HorizontalPodAutoscalerSpec, removing the
// ScaleTargetRef field in autoscalingv2.HorizontalPodAutoscalerSpec.
// +optional
HPASpec HorizontalPodAutoscalerSpec `json:"hpaSpec,omitempty"`
}
// HorizontalPodAutoscalerSpec is very similar to autoscalingv2.HorizontalPodAutoscalerSpec,
// but in autoscalingv2.HorizontalPodAutoscalerSpec, ScaleTargetRef is a required field,
// while the rss object does not require users to specify this field. Therefore, we have
// redefined a HorizontalPodAutoscalerSpec, removing the ScaleTargetRef field in
// autoscalingv2.HorizontalPodAutoscalerSpec.
type HorizontalPodAutoscalerSpec struct {
// +kubebuilder:default:=1
// minReplicas is the lower limit for the number of replicas to which the autoscaler
// can scale down. It defaults to 1 pod. minReplicas is allowed to be 0 if the
// alpha feature gate HPAScaleToZero is enabled and at least one Object or External
// metric is configured. Scaling is active as long as at least one metric value is
// available.
// +optional
MinReplicas *int32 `json:"minReplicas,omitempty"`
// maxReplicas is the upper limit for the number of replicas to which the autoscaler can scale up.
// It cannot be less that minReplicas.
MaxReplicas int32 `json:"maxReplicas"`
// metrics contains the specifications for which to use to calculate the
// desired replica count (the maximum replica count across all metrics will
// be used). The desired replica count is calculated multiplying the
// ratio between the target value and the current value by the current
// number of pods. Ergo, metrics used must decrease as the pod count is
// increased, and vice-versa. See the individual metric source types for
// more information about how each type of metric must respond.
// If not set, the default metric will be set to 80% average CPU utilization.
// +optional
Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
// behavior configures the scaling behavior of the target
// in both Up and Down directions (scaleUp and scaleDown fields respectively).
// If not set, the default HPAScalingRules for scale up and scale down are used.
// +optional
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
}
// RSSPodSpec defines the desired state of coordinators or shuffle servers' pods.
type RSSPodSpec struct {
*MainContainer `json:",inline"`
// Volumes stores volumes' information used by coordinators or shuffle servers.
// +optional
Volumes []corev1.Volume `json:"volumes,omitempty"`
// SidecarContainers represents sidecar containers for monitoring, logging, etc.
// +optional
SidecarContainers []corev1.Container `json:"sidecarContainers,omitempty"`
// SecurityContext holds pod-level security attributes and common container settings.
// +optional
SecurityContext *corev1.PodSecurityContext `json:"securityContext,omitempty"`
// InitContainerImage represents image of init container used to change owner of host paths.
// +optional
InitContainerImage string `json:"initContainerImage,omitempty"`
// +kubebuilder:default:=true
// HostNetwork indicates whether we need to enable host network.
// +optional
HostNetwork *bool `json:"hostNetwork,omitempty"`
// HostPathMounts indicates host path volumes and their mounting path within shuffle servers' containers.
// +optional
HostPathMounts map[string]string `json:"hostPathMounts,omitempty"`
// LogHostPath represents host path used to save logs of shuffle servers.
// +optional
LogHostPath string `json:"logHostPath,omitempty"`
// Labels represents labels to be added in coordinators or shuffle servers' pods.
// +optional
Labels map[string]string `json:"labels,omitempty"`
// RuntimeClassName refers to a RuntimeClass object in the node.k8s.io group, which should be used
// to run this pod. If no RuntimeClass resource matches the named class, the pod will not be run.
// If unset or empty, the "legacy" RuntimeClass will be used, which is an implicit class with an
// empty definition that uses the default runtime handler.
// +optional
RuntimeClassName *string `json:"runtimeClassName,omitempty"`
// NodeSelector is a selector which must be true for the pod to fit on a node.
// Selector which must match a node's labels for the pod to be scheduled on that node.
// +optional
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
// Tolerations indicates the tolerations the pods under this subset have.
// +optional
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
// Affinity is a group of affinity scheduling rules.
// +optional
Affinity *corev1.Affinity `json:"affinity,omitempty"`
}
// MainContainer stores information of the main container of coordinators or shuffle servers,
// its information will be used to generate workload of coordinators or shuffle servers.
type MainContainer struct {
// Image represents image of coordinators or shuffle servers.
Image string `json:"image"`
// Command represents command used by coordinators or shuffle servers.
// +optional
Command []string `json:"command,omitempty"`
// Args represents args used by coordinators or shuffle servers.
// +optional
Args []string `json:"args,omitempty"`
// Ports represents ports used by coordinators or shuffle servers.
// +optional
Ports []corev1.ContainerPort `json:"ports,omitempty"`
// Resources represents resources used by coordinators or shuffle servers.
// +optional
Resources corev1.ResourceRequirements `json:"resources,omitempty"`
// Env represents env set for coordinators or shuffle servers.
// +optional
Env []corev1.EnvVar `json:"env,omitempty"`
// VolumeMounts indicates describes mountings of volumes within shuffle servers' container.
// +optional
VolumeMounts []corev1.VolumeMount `json:"volumeMounts,omitempty"`
}
// RemoteShuffleServiceStatus defines the observed state of RemoteShuffleService
type RemoteShuffleServiceStatus struct {
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
// Important: Run "make" to regenerate code after modifying this file
// Phase defines phase of the RemoteShuffleService.
Phase RSSPhase `json:"phase"`
// TargetKeys records the target names of shuffle servers to be excluded when the RSS object is
// upgrading or terminating.
// +optional
TargetKeys []string `json:"targetKeys,omitempty"`
// DeletedKeys records the names of deleted shuffle servers.
// +optional
DeletedKeys []string `json:"deletedKeys,omitempty"`
// Reason is the reason why the RSS object is failed.
// +optional
Reason string `json:"reason,omitempty"`
}
//+genclient
//+kubebuilder:resource:shortName=rss
//+kubebuilder:object:root=true
//+kubebuilder:subresource:status
//+kubebuilder:printcolumn:name="UpgradeStrategyType",type="string",JSONPath=".spec.shuffleServer.upgradeStrategy.type",description="upgradeStrategy type of shuffleServer"
//+kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase",description="rss phase"
// RemoteShuffleService is the Schema for the remoteshuffleservices API
type RemoteShuffleService struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
Spec RemoteShuffleServiceSpec `json:"spec,omitempty"`
Status RemoteShuffleServiceStatus `json:"status,omitempty"`
}
//+kubebuilder:object:root=true
// RemoteShuffleServiceList contains a list of RemoteShuffleService
type RemoteShuffleServiceList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []RemoteShuffleService `json:"items"`
}
func init() {
SchemeBuilder.Register(&RemoteShuffleService{}, &RemoteShuffleServiceList{})
}