forked from karmada-io/karmada
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
reschedule bindings on cluster change
Signed-off-by: dddddai <dddwq@foxmail.com>
- Loading branch information
Showing
5 changed files
with
182 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,7 +33,7 @@ func (s *Snapshot) GetClusters() []*framework.ClusterInfo { | |
func (s *Snapshot) GetReadyClusters() []*framework.ClusterInfo { | ||
var readyClusterInfoList []*framework.ClusterInfo | ||
for _, c := range s.clusterInfoList { | ||
if util.IsClusterReady(&c.Cluster().Status) { | ||
if util.IsClusterReady(&c.Cluster().Status) && c.Cluster().DeletionTimestamp.IsZero() { | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
dddddai
Author
Owner
|
||
readyClusterInfoList = append(readyClusterInfoList, c) | ||
} | ||
} | ||
|
@@ -45,7 +45,7 @@ func (s *Snapshot) GetReadyClusters() []*framework.ClusterInfo { | |
func (s *Snapshot) GetReadyClusterNames() sets.String { | ||
readyClusterNames := sets.NewString() | ||
for _, c := range s.clusterInfoList { | ||
if util.IsClusterReady(&c.Cluster().Status) { | ||
if util.IsClusterReady(&c.Cluster().Status) && c.Cluster().DeletionTimestamp.IsZero() { | ||
readyClusterNames.Insert(c.Cluster().Name) | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ import ( | |
"k8s.io/apimachinery/pkg/api/meta" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/labels" | ||
"k8s.io/apimachinery/pkg/util/errors" | ||
"k8s.io/apimachinery/pkg/util/wait" | ||
"k8s.io/client-go/dynamic" | ||
"k8s.io/client-go/kubernetes" | ||
|
@@ -31,9 +32,11 @@ import ( | |
worklister "github.com/karmada-io/karmada/pkg/generated/listers/work/v1alpha2" | ||
schedulercache "github.com/karmada-io/karmada/pkg/scheduler/cache" | ||
"github.com/karmada-io/karmada/pkg/scheduler/core" | ||
"github.com/karmada-io/karmada/pkg/scheduler/framework" | ||
"github.com/karmada-io/karmada/pkg/scheduler/framework/plugins/apiinstalled" | ||
"github.com/karmada-io/karmada/pkg/scheduler/framework/plugins/clusteraffinity" | ||
"github.com/karmada-io/karmada/pkg/scheduler/framework/plugins/tainttoleration" | ||
"github.com/karmada-io/karmada/pkg/scheduler/framework/runtime" | ||
"github.com/karmada-io/karmada/pkg/scheduler/metrics" | ||
"github.com/karmada-io/karmada/pkg/util" | ||
"github.com/karmada-io/karmada/pkg/util/helper" | ||
|
@@ -89,10 +92,12 @@ type Scheduler struct { | |
informerFactory informerfactory.SharedInformerFactory | ||
|
||
// TODO: implement a priority scheduling queue | ||
queue workqueue.RateLimitingInterface | ||
queue workqueue.RateLimitingInterface | ||
clusterQueue workqueue.RateLimitingInterface | ||
|
||
Algorithm core.ScheduleAlgorithm | ||
schedulerCache schedulercache.Cache | ||
Algorithm core.ScheduleAlgorithm | ||
schedulerFramework framework.Framework | ||
schedulerCache schedulercache.Cache | ||
|
||
enableSchedulerEstimator bool | ||
schedulerEstimatorCache *estimatorclient.SchedulerEstimatorCache | ||
|
@@ -113,9 +118,11 @@ func NewScheduler(dynamicClient dynamic.Interface, karmadaClient karmadaclientse | |
clusterPolicyLister := factory.Policy().V1alpha1().ClusterPropagationPolicies().Lister() | ||
clusterLister := factory.Cluster().V1alpha1().Clusters().Lister() | ||
queue := workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) | ||
clusterQueue := workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) | ||
schedulerCache := schedulercache.NewCache(clusterLister) | ||
// TODO: make plugins as a flag | ||
algorithm := core.NewGenericScheduler(schedulerCache, policyLister, []string{clusteraffinity.Name, tainttoleration.Name, apiinstalled.Name}) | ||
schedulerFramwork := runtime.NewFramework([]string{clusteraffinity.Name, tainttoleration.Name, apiinstalled.Name}) | ||
algorithm := core.NewGenericScheduler(schedulerCache, policyLister, schedulerFramwork) | ||
sched := &Scheduler{ | ||
DynamicClient: dynamicClient, | ||
KarmadaClient: karmadaClient, | ||
|
@@ -131,7 +138,9 @@ func NewScheduler(dynamicClient dynamic.Interface, karmadaClient karmadaclientse | |
clusterLister: clusterLister, | ||
informerFactory: factory, | ||
queue: queue, | ||
clusterQueue: clusterQueue, | ||
Algorithm: algorithm, | ||
schedulerFramework: schedulerFramwork, | ||
schedulerCache: schedulerCache, | ||
enableSchedulerEstimator: opts.EnableSchedulerEstimator, | ||
} | ||
|
@@ -193,6 +202,7 @@ func (s *Scheduler) Run(ctx context.Context) { | |
} | ||
|
||
go wait.Until(s.worker, time.Second, stopCh) | ||
go wait.Until(s.rescheduleOnClusterChange, 0, stopCh) | ||
|
||
<-stopCh | ||
} | ||
|
@@ -267,6 +277,28 @@ func (s *Scheduler) worker() { | |
} | ||
} | ||
|
||
func (s *Scheduler) rescheduleOnClusterChange() { | ||
key, shutdown := s.clusterQueue.Get() | ||
if shutdown { | ||
klog.Errorf("Failed to pop item from clusterQueue") | ||
} | ||
defer s.clusterQueue.Done(key) | ||
|
||
clusterName := key.(string) | ||
cluster, err := s.clusterLister.Get(clusterName) | ||
if err != nil { | ||
if apierrors.IsNotFound(err) { | ||
return | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
dddddai
Author
Owner
|
||
} | ||
klog.Errorf("Failed to get cluster %q: %v", clusterName, err) | ||
s.clusterQueue.AddRateLimited(key) | ||
} | ||
if err = s.reschduleBindingsForCluster(cluster); err != nil { | ||
klog.Errorf("Failed to reschedule bindings for cluster %q: %v", clusterName, err) | ||
s.clusterQueue.AddRateLimited(key) | ||
} | ||
} | ||
|
||
// requeueResourceBindings will retrieve all ResourceBinding objects by the label selector and put them to queue. | ||
func (s *Scheduler) requeueResourceBindings(selector labels.Selector) error { | ||
referenceBindings, err := s.bindingLister.List(selector) | ||
|
@@ -533,6 +565,10 @@ func (s *Scheduler) addCluster(obj interface{}) { | |
} | ||
klog.V(3).Infof("add event for cluster %s", cluster.Name) | ||
|
||
if meta.IsStatusConditionTrue(cluster.Status.Conditions, clusterv1alpha1.ClusterConditionReady) { | ||
s.clusterQueue.AddRateLimited(cluster.Name) | ||
} | ||
|
||
if s.enableSchedulerEstimator { | ||
s.schedulerEstimatorWorker.AddRateLimited(cluster.Name) | ||
} | ||
|
@@ -551,14 +587,18 @@ func (s *Scheduler) updateCluster(_, newObj interface{}) { | |
} | ||
|
||
// Check if cluster becomes failure | ||
if meta.IsStatusConditionPresentAndEqual(newCluster.Status.Conditions, clusterv1alpha1.ClusterConditionReady, metav1.ConditionFalse) { | ||
if meta.IsStatusConditionFalse(newCluster.Status.Conditions, clusterv1alpha1.ClusterConditionReady) || | ||
!newCluster.DeletionTimestamp.IsZero() { | ||
klog.Infof("Found cluster(%s) failure and failover flag is %v", newCluster.Name, Failover) | ||
|
||
if Failover { // Trigger reschedule on cluster failure only when flag is true. | ||
s.enqueueAffectedBinding(newCluster.Name) | ||
s.enqueueAffectedClusterBinding(newCluster.Name) | ||
return | ||
} | ||
} else if meta.IsStatusConditionTrue(newCluster.Status.Conditions, clusterv1alpha1.ClusterConditionReady) { | ||
// reschedule bindings since cluster updated | ||
s.clusterQueue.AddRateLimited(newCluster.Name) | ||
} | ||
} | ||
|
||
|
@@ -670,7 +710,7 @@ func (s *Scheduler) rescheduleClusterResourceBinding(clusterResourceBinding *wor | |
return err | ||
} | ||
|
||
reScheduleResult, err := s.Algorithm.FailoverSchedule(context.TODO(), &policy.Spec.Placement, &clusterResourceBinding.Spec) | ||
reScheduleResult, err := s.Algorithm.ReSchedule(context.TODO(), &policy.Spec.Placement, &clusterResourceBinding.Spec) | ||
if err != nil { | ||
return err | ||
} | ||
|
@@ -695,7 +735,7 @@ func (s *Scheduler) rescheduleResourceBinding(resourceBinding *workv1alpha2.Reso | |
return err | ||
} | ||
|
||
reScheduleResult, err := s.Algorithm.FailoverSchedule(context.TODO(), &placement, &resourceBinding.Spec) | ||
reScheduleResult, err := s.Algorithm.ReSchedule(context.TODO(), &placement, &resourceBinding.Spec) | ||
if err != nil { | ||
return err | ||
} | ||
|
@@ -869,17 +909,20 @@ func (s *Scheduler) getTypeFromClusterResourceBindings(name string) ScheduleType | |
|
||
func (s *Scheduler) allClustersInReadyState(tcs []workv1alpha2.TargetCluster) bool { | ||
clusters := s.schedulerCache.Snapshot().GetClusters() | ||
count := 0 | ||
for i := range tcs { | ||
for _, c := range clusters { | ||
if c.Cluster().Name == tcs[i].Name { | ||
if meta.IsStatusConditionPresentAndEqual(c.Cluster().Status.Conditions, clusterv1alpha1.ClusterConditionReady, metav1.ConditionFalse) { | ||
if meta.IsStatusConditionPresentAndEqual(c.Cluster().Status.Conditions, clusterv1alpha1.ClusterConditionReady, metav1.ConditionFalse) || | ||
!c.Cluster().DeletionTimestamp.IsZero() { | ||
return false | ||
} | ||
count++ | ||
continue | ||
} | ||
} | ||
} | ||
return true | ||
return count == len(tcs) | ||
} | ||
|
||
func (s *Scheduler) reconcileEstimatorConnection(key util.QueueKey) error { | ||
|
@@ -954,3 +997,84 @@ func (s *Scheduler) updateClusterBindingStatusIfNeeded(crb *workv1alpha2.Cluster | |
} | ||
return nil | ||
} | ||
|
||
func (s *Scheduler) reschduleBindingsForCluster(cluster *clusterv1alpha1.Cluster) error { | ||
rbs, err := s.bindingLister.List(labels.Everything()) | ||
if err != nil { | ||
klog.Errorf("Failed to list all resource bindings: %v", err) | ||
return err | ||
} | ||
var errs []error | ||
for _, rb := range rbs { | ||
placement, _, err := s.getPlacement(rb) | ||
if err != nil { | ||
klog.Errorf("Failed to get placement of ResourceBinding(%s/%s): %v", rb.Namespace, rb.Name, err) | ||
return err | ||
} | ||
if s.needReschedule(cluster, &rb.Spec, placement) { | ||
key, err := cache.MetaNamespaceKeyFunc(rb) | ||
if err != nil { | ||
klog.Errorf("Failed to get key of ResourceBinding(%s/%s): %v", rb.Namespace, rb.Name, err) | ||
return err | ||
} | ||
if err = s.rescheduleOne(key); err != nil { | ||
errs = append(errs, err) | ||
} | ||
} | ||
} | ||
|
||
crbs, err := s.clusterBindingLister.List(labels.Everything()) | ||
if err != nil { | ||
klog.Errorf("Failed to list all cluster resource bindings: %v", err) | ||
return err | ||
} | ||
for _, crb := range crbs { | ||
policyName := util.GetLabelValue(crb.Labels, policyv1alpha1.ClusterPropagationPolicyLabel) | ||
policy, err := s.clusterPolicyLister.Get(policyName) | ||
if err != nil { | ||
klog.Errorf("Failed to get policy of ClusterResourceBinding(%s): %v", crb.Name, err) | ||
return err | ||
} | ||
if s.needReschedule(cluster, &crb.Spec, policy.Spec.Placement) { | ||
key, err := cache.MetaNamespaceKeyFunc(crb) | ||
if err != nil { | ||
klog.Errorf("Failed to get key of ClusterResourceBinding(%s): %v", crb.Name, err) | ||
return err | ||
} | ||
if err = s.rescheduleOne(key); err != nil { | ||
errs = append(errs, err) | ||
} | ||
} | ||
} | ||
return errors.NewAggregate(errs) | ||
} | ||
|
||
func (s *Scheduler) needReschedule(cluster *clusterv1alpha1.Cluster, spec *workv1alpha2.ResourceBindingSpec, placement policyv1alpha1.Placement) bool { | ||
// There are 2 cases that a binding needs rescheduling | ||
// 1. The cluster fits placement but is NOT in binding.spec.newClusters (No more than MaxGroup) | ||
// 2. The cluster does NOT fit the placement but is in binding.spec.newClusters | ||
binded := false | ||
for _, bindingCluster := range spec.Clusters { | ||
if cluster.Name == bindingCluster.Name { | ||
binded = true | ||
break | ||
} | ||
} | ||
if binded { | ||
if !s.clusterFitsPlacement(cluster, placement, &spec.Resource) { | ||
// the cluster does NOT fit the placement but is in binding.spec.clusters | ||
return true | ||
} | ||
} else if s.clusterFitsPlacement(cluster, placement, &spec.Resource) && helper.CheckSpreadConstraints(spec, &placement, cluster.Name) { | ||
// the cluster fits the placement but is NOT in binding.spec.clusters (No more than MaxGroup) | ||
return true | ||
} | ||
return false | ||
} | ||
|
||
func (s *Scheduler) clusterFitsPlacement( | ||
cluster *clusterv1alpha1.Cluster, | ||
placement policyv1alpha1.Placement, | ||
resource *workv1alpha2.ObjectReference) bool { | ||
return s.schedulerFramework.RunFilterPlugins(context.TODO(), &placement, resource, cluster).Merge().IsSuccess() | ||
} |
Oops, something went wrong.
Do you mean
!
c.Cluster().DeletionTimestamp.IsZero()?