Skip to content

Commit

Permalink
the first commit for ensurance, add node-qos-controller, collect, ana…
Browse files Browse the repository at this point in the history
…lyzer and avoidance
  • Loading branch information
yan234280533 committed Nov 25, 2021
1 parent dfb9a2f commit f103c04
Show file tree
Hide file tree
Showing 36 changed files with 2,380 additions and 143 deletions.
161 changes: 161 additions & 0 deletions cmd/crane-agent/app/manager.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
package app

import (
"context"
"flag"
"fmt"
"os"

"github.com/spf13/cobra"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/client-go/kubernetes"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/healthz"

ensuaranceapi "github.com/gocrane/api/ensurance/v1alpha1"
ensuaranceset "github.com/gocrane/api/pkg/generated/clientset/versioned"
"github.com/gocrane-io/crane/cmd/crane-agent/app/options"
ensurancecontroller "github.com/gocrane-io/crane/pkg/controller/ensurance"
"github.com/gocrane-io/crane/pkg/ensurance/analyzer"
"github.com/gocrane-io/crane/pkg/ensurance/avoidance"
"github.com/gocrane-io/crane/pkg/ensurance/executor"
einformer "github.com/gocrane-io/crane/pkg/ensurance/informer"
"github.com/gocrane-io/crane/pkg/ensurance/manager"
"github.com/gocrane-io/crane/pkg/ensurance/nep"
"github.com/gocrane-io/crane/pkg/ensurance/statestore"
"github.com/gocrane-io/crane/pkg/utils/clogs"
)

var (
scheme = runtime.NewScheme()
)

func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
utilruntime.Must(ensuaranceapi.AddToScheme(scheme))
//+kubebuilder:scaffold:scheme
}

// NewManagerCommand creates a *cobra.Command object with default parameters
func NewManagerCommand(ctx context.Context) *cobra.Command {
opts := options.NewOptions()

cmd := &cobra.Command{
Use: "crane-agent",
Long: `The crane agent is responsible agent in crane`,
Run: func(cmd *cobra.Command, args []string) {
if err := opts.Complete(); err != nil {
clogs.Log().Error(err, "opts complete failed,exit")
os.Exit(255)
}
if err := opts.Validate(); err != nil {
clogs.Log().Error(err, "opts validate failed,exit")
os.Exit(255)
}

if err := Run(ctx, opts); err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
os.Exit(1)
}
},
}

cmd.Flags().AddGoFlagSet(flag.CommandLine)
opts.AddFlags(cmd.Flags())
return cmd
}

// Run runs the crane-agent with options. This should never exit.
func Run(ctx context.Context, opts *options.Options) error {
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
Scheme: scheme,
MetricsBindAddress: opts.MetricsAddr,
HealthProbeBindAddress: opts.BindAddr,
Port: int(opts.WebhookPort),
Host: opts.WebhookHost,
LeaderElection: false,
})
if err != nil {
clogs.Log().Error(err, "unable to start crane agent")
os.Exit(1)
}

if err := mgr.AddHealthzCheck("ping", healthz.Ping); err != nil {
clogs.Log().Error(err, "failed to add health check endpoint")
return err
}

clogs.Log().Info(fmt.Sprintf("opts %v", opts))

// init context
ec := initializationContext(mgr, opts)
ec.Run()

// init components
components := initializationComponents(mgr, opts, ec)

// start managers
for _, v := range components {
clogs.Log().Info("Starting manager %s", v.Name())
v.Run(ec.GetStopChannel())
}

clogs.Log().Info("Starting crane agent")
if err := mgr.Start(ctx); err != nil {
clogs.Log().Error(err, "problem running crane manager")
return err
}

return nil
}

func initializationComponents(mgr ctrl.Manager, opts *options.Options, ec *einformer.Context) []manager.Manager {
clogs.Log().Info(fmt.Sprintf("initializationComponents"))

var managers []manager.Manager
podInformer := ec.GetPodFactory().Core().V1().Pods().Informer()
nodeInformer := ec.GetNodeFactory().Core().V1().Nodes().Informer()
nepInformer := ec.GetAvoidanceFactory().Ensurance().V1alpha1().NodeQOSEnsurancePolicies().Informer()
avoidanceInformer := ec.GetAvoidanceFactory().Ensurance().V1alpha1().AvoidanceActions().Informer()

// init state store manager
stateStoreManager := statestore.NewStateStoreManager()
managers = append(managers, stateStoreManager)

// init analyzer manager
analyzerManager := analyzer.NewAnalyzerManager(podInformer, nodeInformer, avoidanceInformer, nepInformer, noticeCh)
managers = append(managers, analyzerManager)

// init avoidance manager
var noticeCh = make(chan executor.AvoidanceExecutorStruct)
avoidanceManager := avoidance.NewAvoidanceManager(ec.GetKubeClient(), opts.HostnameOverride, podInformer, nodeInformer, avoidanceInformer, noticeCh)
managers = append(managers, avoidanceManager)

// init nep controller
nepRecorder := mgr.GetEventRecorderFor("node-qos-controller")
if err := (&ensurancecontroller.NodeQOSEnsurancePolicyController{
Client: mgr.GetClient(),
Log: clogs.Log().WithName("node-qos-controller"),
Scheme: mgr.GetScheme(),
RestMapper: mgr.GetRESTMapper(),
Recorder: nepRecorder,
Cache: &nep.NodeQOSEnsurancePolicyCache{},
StateStore: stateStoreManager,
}).SetupWithManager(mgr); err != nil {
clogs.Log().Error(err, "unable to create controller", "controller", "NodeQOSEnsurancePolicyController")
os.Exit(1)
}

return managers
}

func initializationContext(mgr ctrl.Manager, opts *options.Options) *einformer.Context {
clogs.Log().Info(fmt.Sprintf("initializationContext"))

generatedClient := kubernetes.NewForConfigOrDie(mgr.GetConfig())
clientSet := ensuaranceset.NewForConfigOrDie(mgr.GetConfig())

return einformer.NewContextInitWithClient(generatedClient, clientSet, opts.HostnameOverride)
}
43 changes: 43 additions & 0 deletions cmd/crane-agent/app/options/option.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package options

import (
"github.com/spf13/pflag"
)

// Options hold the command-line options about crane manager
type Options struct {
// MetricsAddr is the address the metric endpoint binds to.
MetricsAddr string
// BindAddr is the address the probe endpoint binds to.
BindAddr string
// WebhookHost is the address webhook binds to.
WebhookHost string
// WebhookPort is the port webhook binds to.
WebhookPort uint64
// HostnameOverride is the name of k8s node
HostnameOverride string
}

// NewOptions builds an empty options.
func NewOptions() *Options {
return &Options{}
}

// Complete completes all the required options.
func (o *Options) Complete() error {
return nil
}

// Validate all required options.
func (o *Options) Validate() error {
return nil
}

// AddFlags adds flags to the specified FlagSet.
func (o *Options) AddFlags(flags *pflag.FlagSet) {
flags.StringVar(&o.MetricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flags.StringVar(&o.BindAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flags.StringVar(&o.WebhookHost, "webhook-host", "0.0.0.0", "The address webhook binds to.")
flags.Uint64Var(&o.WebhookPort, "webhook-port", 9443, "The port webhook binds to.")
flags.StringVar(&o.HostnameOverride, "hostname-override", o.HostnameOverride, "which is the name of k8s node be used to filtered.")
}
27 changes: 27 additions & 0 deletions cmd/crane-agent/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package main

import (
"fmt"
"os"

genericapiserver "k8s.io/apiserver/pkg/server"
"k8s.io/component-base/logs"

"github.com/gocrane-io/crane/cmd/crane-agent/app"
"github.com/gocrane-io/crane/pkg/utils/clogs"
)

// crane-agent main.
func main() {
logs.InitLogs()
defer logs.FlushLogs()

clogs.InitLogs("crane-manager")

ctx := genericapiserver.SetupSignalContext()

if err := app.NewManagerCommand(ctx).Execute(); err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
os.Exit(1)
}
}
35 changes: 35 additions & 0 deletions deploy/deploy/crane-agent/daemonset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
app: crane-agent
name: crane-agent
namespace: crane-system
spec:
revisionHistoryLimit: 10
selector:
matchLabels:
app: crane-agent
template:
metadata:
labels:
app: crane-agent
spec:
containers:
- env:
- name: NODE_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
image: docker.io/gocrane/crane-agent:v0.0.1
imagePullPolicy: Always
name: crane-agent
resources:
limits:
cpu: 100m
memory: 100Mi
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
serviceAccount: crane-agent
65 changes: 65 additions & 0 deletions deploy/deploy/crane-agent/rbac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: crane-agent
rules:
- apiGroups:
- ""
resources:
- pods/status
- pods
verbs:
- get
- list
- watch
- update
- patch
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- update
- patch
- apiGroups:
- ""
resources:
- nodeqosensurancepolicies.ensurance.crane.io
verbs:
- get
- list
- watch
- update
- patch
- apiGroups:
- ""
resources:
- podqosensurancepolicies.ensurance.crane.io
verbs:
- get
- list
- watch
- update
- patch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: crane-agent
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: crane-agent
subjects:
- kind: ServiceAccount
name: crane-agent
namespace: crane-system
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: crane-agent
namespace: crane-system
25 changes: 25 additions & 0 deletions examples/test/lowforevict.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: greedy
description: Priority for low level services.
value: -100

---

apiVersion: v1
kind: Pod
metadata:
name: low
spec:
nodeSelector:
group: "s-000001"
containers:
- image: docker.io/gocrane/stress-ng:v0.12.09
imagePullPolicy: IfNotPresent
name: low
command:
- /bin/bash
- -c
- "sleep 36000"
priorityClassName: greedy
Loading

0 comments on commit f103c04

Please sign in to comment.