-
Notifications
You must be signed in to change notification settings - Fork 5.1k
/
sharding.go
353 lines (316 loc) · 13.3 KB
/
sharding.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
package sharding
import (
"context"
"fmt"
"hash/fnv"
"os"
"sort"
"strconv"
"strings"
"time"
"encoding/json"
"github.com/argoproj/argo-cd/v2/common"
"github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"github.com/argoproj/argo-cd/v2/util/db"
"github.com/argoproj/argo-cd/v2/util/env"
"github.com/argoproj/argo-cd/v2/util/settings"
log "github.com/sirupsen/logrus"
kubeerrors "k8s.io/apimachinery/pkg/api/errors"
)
// Make it overridable for testing
var osHostnameFunction = os.Hostname
// Make it overridable for testing
var heartbeatCurrentTime = metav1.Now
var (
HeartbeatDuration = env.ParseNumFromEnv(common.EnvControllerHeartbeatTime, 10, 10, 60)
HeartbeatTimeout = 3 * HeartbeatDuration
)
const ShardControllerMappingKey = "shardControllerMapping"
type DistributionFunction func(c *v1alpha1.Cluster) int
type ClusterFilterFunction func(c *v1alpha1.Cluster) bool
// shardApplicationControllerMapping stores the mapping of Shard Number to Application Controller in ConfigMap.
// It also stores the heartbeat of last synced time of the application controller.
type shardApplicationControllerMapping struct {
ShardNumber int
ControllerName string
HeartbeatTime metav1.Time
}
// GetClusterFilter returns a ClusterFilterFunction which is a function taking a cluster as a parameter
// and returns wheter or not the cluster should be processed by a given shard. It calls the distributionFunction
// to determine which shard will process the cluster, and if the given shard is equal to the calculated shard
// the function will return true.
func GetClusterFilter(db db.ArgoDB, distributionFunction DistributionFunction, shard int) ClusterFilterFunction {
replicas := db.GetApplicationControllerReplicas()
return func(c *v1alpha1.Cluster) bool {
clusterShard := 0
if c != nil && c.Shard != nil {
requestedShard := int(*c.Shard)
if requestedShard < replicas {
clusterShard = requestedShard
} else {
log.Warnf("Specified cluster shard (%d) for cluster: %s is greater than the number of available shard. Assigning automatically.", requestedShard, c.Name)
}
} else {
clusterShard = distributionFunction(c)
}
return clusterShard == shard
}
}
// GetDistributionFunction returns which DistributionFunction should be used based on the passed algorithm and
// the current datas.
func GetDistributionFunction(db db.ArgoDB, shardingAlgorithm string) DistributionFunction {
log.Infof("Using filter function: %s", shardingAlgorithm)
distributionFunction := LegacyDistributionFunction(db)
switch shardingAlgorithm {
case common.RoundRobinShardingAlgorithm:
distributionFunction = RoundRobinDistributionFunction(db)
case common.LegacyShardingAlgorithm:
distributionFunction = LegacyDistributionFunction(db)
default:
log.Warnf("distribution type %s is not supported, defaulting to %s", shardingAlgorithm, common.DefaultShardingAlgorithm)
}
return distributionFunction
}
// LegacyDistributionFunction returns a DistributionFunction using a stable distribution algorithm:
// for a given cluster the function will return the shard number based on the cluster id. This function
// is lightweight and can be distributed easily, however, it does not ensure an homogenous distribution as
// some shards may get assigned more clusters than others. It is the legacy function distribution that is
// kept for compatibility reasons
func LegacyDistributionFunction(db db.ArgoDB) DistributionFunction {
replicas := db.GetApplicationControllerReplicas()
return func(c *v1alpha1.Cluster) int {
if replicas == 0 {
return -1
}
if c == nil {
return 0
}
id := c.ID
log.Debugf("Calculating cluster shard for cluster id: %s", id)
if id == "" {
return 0
} else {
h := fnv.New32a()
_, _ = h.Write([]byte(id))
shard := int32(h.Sum32() % uint32(replicas))
log.Debugf("Cluster with id=%s will be processed by shard %d", id, shard)
return int(shard)
}
}
}
// RoundRobinDistributionFunction returns a DistributionFunction using an homogeneous distribution algorithm:
// for a given cluster the function will return the shard number based on the modulo of the cluster rank in
// the cluster's list sorted by uid on the shard number.
// This function ensures an homogenous distribution: each shards got assigned the same number of
// clusters +/-1 , but with the drawback of a reshuffling of clusters accross shards in case of some changes
// in the cluster list
func RoundRobinDistributionFunction(db db.ArgoDB) DistributionFunction {
replicas := db.GetApplicationControllerReplicas()
return func(c *v1alpha1.Cluster) int {
if replicas > 0 {
if c == nil { // in-cluster does not necessarly have a secret assigned. So we are receiving a nil cluster here.
return 0
} else {
clusterIndexdByClusterIdMap := createClusterIndexByClusterIdMap(db)
clusterIndex, ok := clusterIndexdByClusterIdMap[c.ID]
if !ok {
log.Warnf("Cluster with id=%s not found in cluster map.", c.ID)
return -1
}
shard := int(clusterIndex % replicas)
log.Debugf("Cluster with id=%s will be processed by shard %d", c.ID, shard)
return shard
}
}
log.Warnf("The number of replicas (%d) is lower than 1", replicas)
return -1
}
}
// InferShard extracts the shard index based on its hostname.
func InferShard() (int, error) {
hostname, err := osHostnameFunction()
if err != nil {
return -1, err
}
parts := strings.Split(hostname, "-")
if len(parts) == 0 {
return 0, fmt.Errorf("hostname should ends with shard number separated by '-' but got: %s", hostname)
}
shard, err := strconv.Atoi(parts[len(parts)-1])
if err != nil {
return 0, fmt.Errorf("hostname should ends with shard number separated by '-' but got: %s", hostname)
}
return int(shard), nil
}
func getSortedClustersList(db db.ArgoDB) []v1alpha1.Cluster {
ctx := context.Background()
clustersList, dbErr := db.ListClusters(ctx)
if dbErr != nil {
log.Warnf("Error while querying clusters list from database: %v", dbErr)
return []v1alpha1.Cluster{}
}
clusters := clustersList.Items
sort.Slice(clusters, func(i, j int) bool {
return clusters[i].ID < clusters[j].ID
})
return clusters
}
func createClusterIndexByClusterIdMap(db db.ArgoDB) map[string]int {
clusters := getSortedClustersList(db)
log.Debugf("ClustersList has %d items", len(clusters))
clusterById := make(map[string]v1alpha1.Cluster)
clusterIndexedByClusterId := make(map[string]int)
for i, cluster := range clusters {
log.Debugf("Adding cluster with id=%s and name=%s to cluster's map", cluster.ID, cluster.Name)
clusterById[cluster.ID] = cluster
clusterIndexedByClusterId[cluster.ID] = i
}
return clusterIndexedByClusterId
}
// GetOrUpdateShardFromConfigMap finds the shard number from the shard mapping configmap. If the shard mapping configmap does not exist,
// the function creates the shard mapping configmap.
// The function takes the shard number from the environment variable (default value -1, if not set) and passes it to this function.
// If the shard value passed to this function is -1, that is, the shard was not set as an environment variable,
// we default the shard number to 0 for computing the default config map.
func GetOrUpdateShardFromConfigMap(kubeClient *kubernetes.Clientset, settingsMgr *settings.SettingsManager, replicas, shard int) (int, error) {
hostname, err := osHostnameFunction()
if err != nil {
return -1, err
}
// fetch the shard mapping configMap
shardMappingCM, err := kubeClient.CoreV1().ConfigMaps(settingsMgr.GetNamespace()).Get(context.Background(), common.ArgoCDAppControllerShardConfigMapName, metav1.GetOptions{})
if err != nil {
if !kubeerrors.IsNotFound(err) {
return -1, fmt.Errorf("error getting sharding config map: %s", err)
}
log.Infof("shard mapping configmap %s not found. Creating default shard mapping configmap.", common.ArgoCDAppControllerShardConfigMapName)
// if the shard is not set as an environment variable, set the default value of shard to 0 for generating default CM
if shard == -1 {
shard = 0
}
shardMappingCM, err = generateDefaultShardMappingCM(settingsMgr.GetNamespace(), hostname, replicas, shard)
if err != nil {
return -1, fmt.Errorf("error generating default shard mapping configmap %s", err)
}
if _, err = kubeClient.CoreV1().ConfigMaps(settingsMgr.GetNamespace()).Create(context.Background(), shardMappingCM, metav1.CreateOptions{}); err != nil {
return -1, fmt.Errorf("error creating shard mapping configmap %s", err)
}
// return 0 as the controller is assigned to shard 0 while generating default shard mapping ConfigMap
return shard, nil
} else {
// Identify the available shard and update the ConfigMap
data := shardMappingCM.Data[ShardControllerMappingKey]
var shardMappingData []shardApplicationControllerMapping
err := json.Unmarshal([]byte(data), &shardMappingData)
if err != nil {
return -1, fmt.Errorf("error unmarshalling shard config map data: %s", err)
}
shard, shardMappingData := getOrUpdateShardNumberForController(shardMappingData, hostname, replicas, shard)
updatedShardMappingData, err := json.Marshal(shardMappingData)
if err != nil {
return -1, fmt.Errorf("error marshalling data of shard mapping ConfigMap: %s", err)
}
shardMappingCM.Data[ShardControllerMappingKey] = string(updatedShardMappingData)
_, err = kubeClient.CoreV1().ConfigMaps(settingsMgr.GetNamespace()).Update(context.Background(), shardMappingCM, metav1.UpdateOptions{})
if err != nil {
return -1, err
}
return shard, nil
}
}
// getOrUpdateShardNumberForController takes list of shardApplicationControllerMapping and performs computation to find the matching or empty shard number
func getOrUpdateShardNumberForController(shardMappingData []shardApplicationControllerMapping, hostname string, replicas, shard int) (int, []shardApplicationControllerMapping) {
// if current length of shardMappingData in shard mapping configMap is less than the number of replicas,
// create additional empty entries for missing shard numbers in shardMappingDataconfigMap
if len(shardMappingData) < replicas {
// generate extra default mappings
for currentShard := len(shardMappingData); currentShard < replicas; currentShard++ {
shardMappingData = append(shardMappingData, shardApplicationControllerMapping{
ShardNumber: currentShard,
})
}
}
// if current length of shardMappingData in shard mapping configMap is more than the number of replicas,
// we replace the config map with default config map and let controllers self assign the new shard to itself
if len(shardMappingData) > replicas {
shardMappingData = getDefaultShardMappingData(replicas)
}
if shard != -1 && shard < replicas {
log.Debugf("update heartbeat for shard %d", shard)
for i := range shardMappingData {
shardMapping := shardMappingData[i]
if shardMapping.ShardNumber == shard {
log.Debugf("Shard found. Updating heartbeat!!")
shardMapping.ControllerName = hostname
shardMapping.HeartbeatTime = heartbeatCurrentTime()
shardMappingData[i] = shardMapping
break
}
}
} else {
// find the matching shard with assigned controllerName
for i := range shardMappingData {
shardMapping := shardMappingData[i]
if shardMapping.ControllerName == hostname {
log.Debugf("Shard matched. Updating heartbeat!!")
shard = int(shardMapping.ShardNumber)
shardMapping.HeartbeatTime = heartbeatCurrentTime()
shardMappingData[i] = shardMapping
break
}
}
}
// at this point, we have still not found a shard with matching hostname.
// So, find a shard with either no controller assigned or assigned controller
// with heartbeat past threshold
if shard == -1 {
for i := range shardMappingData {
shardMapping := shardMappingData[i]
if (shardMapping.ControllerName == "") || (metav1.Now().After(shardMapping.HeartbeatTime.Add(time.Duration(HeartbeatTimeout) * time.Second))) {
shard = int(shardMapping.ShardNumber)
log.Debugf("Empty shard found %d", shard)
shardMapping.ControllerName = hostname
shardMapping.HeartbeatTime = heartbeatCurrentTime()
shardMappingData[i] = shardMapping
break
}
}
}
return shard, shardMappingData
}
// generateDefaultShardMappingCM creates a default shard mapping configMap. Assigns current controller to shard 0.
func generateDefaultShardMappingCM(namespace, hostname string, replicas, shard int) (*v1.ConfigMap, error) {
shardingCM := &v1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: common.ArgoCDAppControllerShardConfigMapName,
Namespace: namespace,
},
Data: map[string]string{},
}
shardMappingData := getDefaultShardMappingData(replicas)
// if shard is not assigned to a controller, we use shard 0
if shard == -1 || shard > replicas {
shard = 0
}
shardMappingData[shard].ControllerName = hostname
shardMappingData[shard].HeartbeatTime = heartbeatCurrentTime()
data, err := json.Marshal(shardMappingData)
if err != nil {
return nil, fmt.Errorf("error generating default ConfigMap: %s", err)
}
shardingCM.Data[ShardControllerMappingKey] = string(data)
return shardingCM, nil
}
func getDefaultShardMappingData(replicas int) []shardApplicationControllerMapping {
shardMappingData := make([]shardApplicationControllerMapping, 0)
for i := 0; i < replicas; i++ {
mapping := shardApplicationControllerMapping{
ShardNumber: i,
}
shardMappingData = append(shardMappingData, mapping)
}
return shardMappingData
}