-
Notifications
You must be signed in to change notification settings - Fork 1
/
gpupod_reconciler.go
109 lines (87 loc) · 3.22 KB
/
gpupod_reconciler.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
/*
Copyright © 2021 The nvidia-gpu-scheduler Authors.
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package controller
import (
"context"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/caden2016/nvidia-gpu-scheduler/pkg/util/server/watcher"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/klog/v2"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
resourcesschedulerv1 "github.com/caden2016/nvidia-gpu-scheduler/api/gpupod/v1"
)
const podReconcilerControllerName = "gpupod-reconciler-controller"
// GpuPodReconciler reconciles a GpuPod object
type GpuPodReconciler struct {
client.Client
Scheme *runtime.Scheme
}
//+kubebuilder:rbac:groups=resources.scheduler.caden2016.github.io,resources=gpupods,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=resources.scheduler.caden2016.github.io,resources=gpupods/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=resources.scheduler.caden2016.github.io,resources=gpupods/finalizers,verbs=update
func (r *GpuPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
gpuPod, err := r.getGpuPod(ctx, req)
if err != nil {
// Error reading the object - requeue the request.
klog.Errorf("Failed to get %s, err:%v", req.String(), err)
return ctrl.Result{}, err
}
var etype watcher.EventType
if gpuPod == nil {
klog.Infof("%s be deleted.", req.String())
gpuPod = &resourcesschedulerv1.GpuPod{ObjectMeta: metav1.ObjectMeta{Namespace: req.Namespace, Name: req.Name}}
etype = watcher.Deleted
} else {
klog.Infof("%s be synced.", req.String())
etype = watcher.Synced
}
notifyWatchersGpuPod(gpuPod, etype)
return ctrl.Result{}, nil
}
// SetupWithManager sets up the controller with the Manager.
func (r *GpuPodReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&resourcesschedulerv1.GpuPod{}).
Complete(r)
}
func (r *GpuPodReconciler) getGpuPod(ctx context.Context, req ctrl.Request) (gpuPod *resourcesschedulerv1.GpuPod, err error) {
ctxtodo, cancel := context.WithTimeout(ctx, time.Second)
defer cancel()
gpuPod = &resourcesschedulerv1.GpuPod{}
err = r.Get(ctxtodo, req.NamespacedName, gpuPod)
if err != nil {
if errors.IsNotFound(err) {
// resource be deleted
gpuPod = nil
err = nil
return
}
gpuPod = nil
return
}
return
}
// notifyWatchersGpuPod notify the change of gpunode to all watchers from rest api in metricserver.
func notifyWatchersGpuPod(gpuPod *resourcesschedulerv1.GpuPod, etype watcher.EventType) {
egn := watcher.NewGpuPodEvent(gpuPod, etype)
for _, wch := range watcher.GpuPodWatcher.ListWatcher() {
select {
case wch <- egn:
default:
}
}
}