-
Notifications
You must be signed in to change notification settings - Fork 5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: Fixes sharding placement algorithm and allows development of alternative algorithms #13018
Changes from all commits
9c7a15b
19f19d8
d4c18c8
d395186
c7ea155
095cb58
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ import ( | |
"k8s.io/client-go/kubernetes/fake" | ||
"k8s.io/client-go/rest" | ||
"k8s.io/client-go/tools/clientcmd" | ||
"k8s.io/utils/pointer" | ||
|
||
cmdutil "github.com/argoproj/argo-cd/v2/cmd/util" | ||
"github.com/argoproj/argo-cd/v2/common" | ||
|
@@ -115,10 +116,13 @@ func loadClusters(ctx context.Context, kubeClient *kubernetes.Clientset, appClie | |
} | ||
batch := clustersList.Items[batchStart:batchEnd] | ||
_ = kube.RunAllAsync(len(batch), func(i int) error { | ||
cluster := batch[i] | ||
clusterShard := 0 | ||
cluster := batch[i] | ||
if replicas > 0 { | ||
clusterShard = sharding.GetShardByID(cluster.ID, replicas) | ||
distributionFunction := sharding.GetDistributionFunction(argoDB, common.DefaultShardingAlgorithm) | ||
distributionFunction(&cluster) | ||
cluster.Shard = pointer.Int64Ptr(int64(clusterShard)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
log.Infof("Cluster with uid: %s will be processed by shard %d", cluster.ID, clusterShard) | ||
} | ||
|
||
if shard != -1 && clusterShard != shard { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,14 +41,17 @@ import ( | |
"github.com/argoproj/argo-cd/v2/common" | ||
statecache "github.com/argoproj/argo-cd/v2/controller/cache" | ||
"github.com/argoproj/argo-cd/v2/controller/metrics" | ||
"github.com/argoproj/argo-cd/v2/controller/sharding" | ||
"github.com/argoproj/argo-cd/v2/pkg/apis/application" | ||
appv1 "github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1" | ||
argov1alpha "github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1" | ||
appclientset "github.com/argoproj/argo-cd/v2/pkg/client/clientset/versioned" | ||
"github.com/argoproj/argo-cd/v2/pkg/client/informers/externalversions/application/v1alpha1" | ||
applisters "github.com/argoproj/argo-cd/v2/pkg/client/listers/application/v1alpha1" | ||
"github.com/argoproj/argo-cd/v2/reposerver/apiclient" | ||
"github.com/argoproj/argo-cd/v2/util/argo" | ||
argodiff "github.com/argoproj/argo-cd/v2/util/argo/diff" | ||
|
||
appstatecache "github.com/argoproj/argo-cd/v2/util/cache/appstate" | ||
"github.com/argoproj/argo-cd/v2/util/db" | ||
"github.com/argoproj/argo-cd/v2/util/errors" | ||
|
@@ -229,10 +232,12 @@ func (ctrl *ApplicationController) InvalidateProjectsCache(names ...string) { | |
ctrl.projByNameCache.Delete(name) | ||
} | ||
} else { | ||
ctrl.projByNameCache.Range(func(key, _ interface{}) bool { | ||
ctrl.projByNameCache.Delete(key) | ||
return true | ||
}) | ||
if ctrl != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this |
||
ctrl.projByNameCache.Range(func(key, _ interface{}) bool { | ||
ctrl.projByNameCache.Delete(key) | ||
return true | ||
}) | ||
} | ||
} | ||
} | ||
|
||
|
@@ -2010,3 +2015,5 @@ func (ctrl *ApplicationController) toAppKey(appName string) string { | |
func (ctrl *ApplicationController) toAppQualifiedName(appName, appNamespace string) string { | ||
return fmt.Sprintf("%s/%s", appNamespace, appName) | ||
} | ||
|
||
type ClusterFilterFunction func(c *argov1alpha.Cluster, distributionFunction sharding.DistributionFunction) bool |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,127 @@ | ||
package sharding | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"hash/fnv" | ||
"math" | ||
"os" | ||
"sort" | ||
"strconv" | ||
"strings" | ||
|
||
"github.com/argoproj/argo-cd/v2/common" | ||
"github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1" | ||
|
||
"github.com/argoproj/argo-cd/v2/util/db" | ||
"github.com/argoproj/argo-cd/v2/util/env" | ||
log "github.com/sirupsen/logrus" | ||
) | ||
|
||
// Make it overridable for testing | ||
var osHostnameFunction = os.Hostname | ||
|
||
type DistributionFunction func(c *v1alpha1.Cluster) int | ||
type ClusterFilterFunction func(c *v1alpha1.Cluster) bool | ||
|
||
// GetClusterFilter returns a ClusterFilterFunction which is a function taking a cluster as a parameter | ||
// and returns wheter or not the cluster should be processed by a given shard. It calls the distributionFunction | ||
// to determine which shard will process the cluster, and if the given shard is equal to the calculated shard | ||
// the function will return true. | ||
func GetClusterFilter(distributionFunction DistributionFunction, shard int) ClusterFilterFunction { | ||
replicas := env.ParseNumFromEnv(common.EnvControllerReplicas, 0, 0, math.MaxInt32) | ||
return func(c *v1alpha1.Cluster) bool { | ||
clusterShard := 0 | ||
if c != nil && c.Shard != nil { | ||
requestedShard := int(*c.Shard) | ||
if requestedShard < replicas { | ||
clusterShard = requestedShard | ||
} else { | ||
log.Warnf("Specified cluster shard (%d) for cluster: %s is greater than the number of available shard. Assigning automatically.", requestedShard, c.Name) | ||
} | ||
} else { | ||
clusterShard = distributionFunction(c) | ||
} | ||
return clusterShard == shard | ||
} | ||
} | ||
|
||
// GetDistributionFunction returns which DistributionFunction should be used based on the passed algorithm and | ||
// the current datas. | ||
func GetDistributionFunction(db db.ArgoDB, shardingAlgorithm string) DistributionFunction { | ||
log.Infof("Using filter function: %s", shardingAlgorithm) | ||
distributionFunction := LegacyDistributionFunction() | ||
switch shardingAlgorithm { | ||
case common.RoundRobinShardingAlgorithm: | ||
distributionFunction = RoundRobinDistributionFunction(db) | ||
case common.LegacyShardingAlgorithm: | ||
distributionFunction = LegacyDistributionFunction() | ||
default: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
log.Warnf("distribution type %s is not supported, defaulting to %s", shardingAlgorithm, common.DefaultShardingAlgorithm) | ||
} | ||
return distributionFunction | ||
} | ||
|
||
// LegacyDistributionFunction returns a DistributionFunction using a stable distribution algorithm: | ||
// for a given cluster the function will return the shard number based on the cluster id. This function | ||
// is lightweight and can be distributed easily, however, it does not ensure an homogenous distribution as | ||
// some shards may get assigned more clusters than others. It is the legacy function distribution that is | ||
// kept for compatibility reasons | ||
func LegacyDistributionFunction() DistributionFunction { | ||
replicas := env.ParseNumFromEnv(common.EnvControllerReplicas, 0, 0, math.MaxInt32) | ||
return func(c *v1alpha1.Cluster) int { | ||
if replicas == 0 { | ||
return -1 | ||
} | ||
if c == nil { | ||
return 0 | ||
} | ||
id := c.ID | ||
log.Debugf("Calculating cluster shard for cluster id: %s", id) | ||
if id == "" { | ||
return 0 | ||
} else { | ||
h := fnv.New32a() | ||
_, _ = h.Write([]byte(id)) | ||
shard := int32(h.Sum32() % uint32(replicas)) | ||
log.Infof("Cluster with id=%s will be processed by shard %d", id, shard) | ||
return int(shard) | ||
} | ||
} | ||
} | ||
|
||
// RoundRobinDistributionFunction returns a DistributionFunction using an homogeneous distribution algorithm: | ||
// for a given cluster the function will return the shard number based on the modulo of the cluster rank in | ||
// the cluster's list sorted by uid on the shard number. | ||
// This function ensures an homogenous distribution: each shards got assigned the same number of | ||
// clusters +/-1 , but with the drawback of a reshuffling of clusters accross shards in case of some changes | ||
// in the cluster list | ||
func RoundRobinDistributionFunction(db db.ArgoDB) DistributionFunction { | ||
replicas := env.ParseNumFromEnv(common.EnvControllerReplicas, 0, 0, math.MaxInt32) | ||
return func(c *v1alpha1.Cluster) int { | ||
if replicas > 0 { | ||
if c == nil { // in-cluster does not necessarly have a secret assigned. So we are receiving a nil cluster here. | ||
return 0 | ||
} else { | ||
clusterIndexdByClusterIdMap := createClusterIndexByClusterIdMap(db) | ||
clusterIndex, ok := clusterIndexdByClusterIdMap[c.ID] | ||
if !ok { | ||
log.Warnf("Cluster with id=%s not found in cluster map.", c.ID) | ||
return -1 | ||
} | ||
shard := int(clusterIndex % replicas) | ||
log.Infof("Cluster with id=%s will be processed by shard %d", c.ID, shard) | ||
return shard | ||
} | ||
} | ||
log.Warnf("The number of replicas (%d) is lower than 1", replicas) | ||
return -1 | ||
} | ||
} | ||
|
||
// InferShard extracts the shard index based on its hostname. | ||
func InferShard() (int, error) { | ||
hostname, err := os.Hostname() | ||
hostname, err := osHostnameFunction() | ||
if err != nil { | ||
return 0, err | ||
} | ||
|
@@ -23,31 +133,32 @@ func InferShard() (int, error) { | |
if err != nil { | ||
return 0, fmt.Errorf("hostname should ends with shard number separated by '-' but got: %s", hostname) | ||
} | ||
return shard, nil | ||
return int(shard), nil | ||
} | ||
|
||
// GetShardByID calculates cluster shard as `clusterSecret.UID % replicas count` | ||
func GetShardByID(id string, replicas int) int { | ||
if id == "" { | ||
return 0 | ||
} else { | ||
h := fnv.New32a() | ||
_, _ = h.Write([]byte(id)) | ||
return int(h.Sum32() % uint32(replicas)) | ||
func getSortedClustersList(db db.ArgoDB) []v1alpha1.Cluster { | ||
ctx := context.Background() | ||
clustersList, dbErr := db.ListClusters(ctx) | ||
if dbErr != nil { | ||
log.Warnf("Error while querying clusters list from database: %v", dbErr) | ||
akram marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return []v1alpha1.Cluster{} | ||
} | ||
clusters := clustersList.Items | ||
sort.Slice(clusters, func(i, j int) bool { | ||
return clusters[i].ID < clusters[j].ID | ||
}) | ||
return clusters | ||
} | ||
|
||
func GetClusterFilter(replicas int, shard int) func(c *v1alpha1.Cluster) bool { | ||
return func(c *v1alpha1.Cluster) bool { | ||
clusterShard := 0 | ||
// cluster might be nil if app is using invalid cluster URL, assume shard 0 in this case. | ||
if c != nil { | ||
if c.Shard != nil { | ||
clusterShard = int(*c.Shard) | ||
} else { | ||
clusterShard = GetShardByID(c.ID, replicas) | ||
} | ||
} | ||
return clusterShard == shard | ||
func createClusterIndexByClusterIdMap(db db.ArgoDB) map[string]int { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we add some comments for the newly added functions ? |
||
clusters := getSortedClustersList(db) | ||
log.Debugf("ClustersList has %d items", len(clusters)) | ||
clusterById := make(map[string]v1alpha1.Cluster) | ||
clusterIndexedByClusterId := make(map[string]int) | ||
for i, cluster := range clusters { | ||
log.Debugf("Adding cluster with id=%s and name=%s to cluster's map", cluster.ID, cluster.Name) | ||
clusterById[cluster.ID] = cluster | ||
clusterIndexedByClusterId[cluster.ID] = i | ||
} | ||
return clusterIndexedByClusterId | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this line be
clusterShard=distributionFunction(&cluster)
?