Added indexing support for multiple prometheus endpoints (#251)

* added indexing support for multiple prometheus endpoints * fixed go lint errors (formatting) * added the same option in the kube-burner cfg file for e2e tests * added tests and relevant docs * extending multiple enpoints support to init and ocp commands * adding additinal prometheus for ocp tests * updated docs and tests new functinality * adding a boolean for index vs non-index workflow in scraping metrics * resolved comments for PR-251 revision2 * fixing golint errors and moving exit code to fix tests * fixing workflow test for indexing with multiple endpoints case * fix the logging nit and improvised if checks for start and end times read from YAML * resolving conflicts with user metadata changes * fixing test failures with updated code --------- Co-authored-by: Vishnu Challa <vchalla@vchalla.remote.csb> Co-authored-by: Joe Talerico (rook) <joe.talerico@gmail.com>
kube-burner · Feb 28, 2023 · c5353b3 · c5353b3
1 parent 9bac817
commit c5353b3
Show file tree

Hide file tree

Showing 20 changed files with 505 additions and 172 deletions.
diff --git a/cmd/kube-burner/kube-burner.go b/cmd/kube-burner/kube-burner.go
@@ -24,8 +24,8 @@ import (
 	"github.com/cloud-bulldozer/kube-burner/log"
 	"github.com/cloud-bulldozer/kube-burner/pkg/alerting"
 	"github.com/cloud-bulldozer/kube-burner/pkg/burner"
+	"github.com/cloud-bulldozer/kube-burner/pkg/commons"
 	"github.com/cloud-bulldozer/kube-burner/pkg/config"
-	"github.com/cloud-bulldozer/kube-burner/pkg/util"
 	"github.com/cloud-bulldozer/kube-burner/pkg/version"
 
 	"github.com/cloud-bulldozer/kube-burner/pkg/indexers"
@@ -79,13 +79,10 @@ To configure your bash shell to load completions for each session execute:
 
 func initCmd() *cobra.Command {
 	var err error
-	var url, metricsProfile, alertProfile, configFile string
+	var url, metricsEndpoint, metricsProfile, alertProfile, configFile string
 	var username, password, uuid, token, configMap, namespace, userMetadata string
 	var skipTLSVerify bool
 	var prometheusStep time.Duration
-	var prometheusClient *prometheus.Prometheus
-	var alertM *alerting.AlertManager
-	var indexer *indexers.Indexer
 	var timeout time.Duration
 	var rc int
 	cmd := &cobra.Command{
@@ -97,7 +94,6 @@ func initCmd() *cobra.Command {
 		},
 		Args: cobra.NoArgs,
 		Run: func(cmd *cobra.Command, args []string) {
-			userMetadataContent := make(map[string]interface{})
 			if configMap != "" {
 				metricsProfile, alertProfile, err = config.FetchConfigMap(configMap, namespace)
 				if err != nil {
@@ -106,43 +102,21 @@ func initCmd() *cobra.Command {
 				// We assume configFile is config.yml
 				configFile = "config.yml"
 			}
-			configSpec, err := config.Parse(configFile, true)
-			if err != nil {
-				log.Fatal(err.Error())
-			}
-			if url == "" {
-				url = configSpec.GlobalConfig.PrometheusURL
-			}
-			if token == "" {
-				token = configSpec.GlobalConfig.BearerToken
-			}
-			if metricsProfile != "" {
-				configSpec.GlobalConfig.MetricsProfile = metricsProfile
-			}
-			if configSpec.GlobalConfig.IndexerConfig.Enabled {
-				indexer, err = indexers.NewIndexer(configSpec)
-				if err != nil {
-					log.Fatal(err.Error())
-				}
-			}
-			if url != "" {
-				prometheusClient, err = prometheus.NewPrometheusClient(configSpec, url, token, username, password, uuid, skipTLSVerify, prometheusStep, map[string]interface{}{})
-				if err != nil {
-					log.Fatal(err)
-				}
-				if alertProfile != "" {
-					if alertM, err = alerting.NewAlertManager(alertProfile, uuid, configSpec.GlobalConfig.IndexerConfig.DefaultIndex, indexer, prometheusClient); err != nil {
-						log.Fatalf("Error creating alert manager: %s", err)
-					}
-				}
-			}
-			if userMetadata != "" {
-				userMetadataContent, err = util.ReadUserMetadata(userMetadata)
-				if err != nil {
-					log.Fatalf("Error reading provided user metadata: %v", err)
-				}
-			}
-			rc, err = burner.Run(configSpec, uuid, prometheusClient, alertM, indexer, timeout, userMetadataContent)
+			metricsScraper := commons.ProcessMetricsScraperConfig(commons.MetricsScraperConfig{
+				ConfigFile:      configFile,
+				Password:        password,
+				PrometheusStep:  prometheusStep,
+				MetricsEndpoint: metricsEndpoint,
+				MetricsProfile:  metricsProfile,
+				AlertProfile:    alertProfile,
+				SkipTLSVerify:   skipTLSVerify,
+				URL:             url,
+				Token:           token,
+				Username:        username,
+				UUID:            uuid,
+				UserMetaData:    userMetadata,
+			})
+			rc, err = burner.Run(metricsScraper.ConfigSpec, uuid, metricsScraper.PrometheusClients, metricsScraper.AlertMs, metricsScraper.Indexer, timeout, metricsScraper.UserMetadataContent)
 			if err != nil {
 				log.Fatalf(err.Error())
 			}
@@ -154,6 +128,7 @@ func initCmd() *cobra.Command {
 	cmd.Flags().StringVar(&username, "username", "", "Prometheus username for authentication")
 	cmd.Flags().StringVarP(&password, "password", "p", "", "Prometheus password for basic authentication")
 	cmd.Flags().StringVarP(&metricsProfile, "metrics-profile", "m", "", "Metrics profile file or URL")
+	cmd.Flags().StringVarP(&metricsEndpoint, "metrics-endpoint", "e", "", "YAML file with a list of metric endpoints")
 	cmd.Flags().StringVarP(&alertProfile, "alert-profile", "a", "", "Alert profile file or URL")
 	cmd.Flags().BoolVar(&skipTLSVerify, "skip-tls-verify", true, "Verify prometheus TLS certificate")
 	cmd.Flags().DurationVarP(&prometheusStep, "step", "s", 30*time.Second, "Prometheus step size")
@@ -199,12 +174,11 @@ func destroyCmd() *cobra.Command {
 }
 
 func indexCmd() *cobra.Command {
-	var url, metricsProfile, configFile, jobName string
+	var url, metricsEndpoint, metricsProfile, configFile, jobName string
 	var start, end int64
 	var username, password, uuid, token, userMetadata string
 	var skipTLSVerify bool
 	var prometheusStep time.Duration
-	var indexer *indexers.Indexer
 	cmd := &cobra.Command{
 		Use:   "index",
 		Short: "Index kube-burner metrics",
@@ -213,54 +187,23 @@ func indexCmd() *cobra.Command {
 			log.Info("👋 Exiting kube-burner ", uuid)
 		},
 		Run: func(cmd *cobra.Command, args []string) {
-			userMetadataContent := make(map[string]interface{})
-			configSpec, err := config.Parse(configFile, false)
-			if err != nil {
-				log.Fatal(err.Error())
-			}
-			if configSpec.GlobalConfig.IndexerConfig.Enabled {
-				indexer, err = indexers.NewIndexer(configSpec)
-				if err != nil {
-					log.Fatal(err.Error())
-				}
-			}
-			if url == "" {
-				url = configSpec.GlobalConfig.PrometheusURL
-			}
-			if token == "" {
-				token = configSpec.GlobalConfig.BearerToken
-			}
-			if metricsProfile != "" {
-				configSpec.GlobalConfig.MetricsProfile = metricsProfile
-			}
-			if userMetadata != "" {
-				userMetadataContent, err = util.ReadUserMetadata(userMetadata)
-				if err != nil {
-					log.Fatalf("Error reading provided user metadata: %v", err)
-				}
-			}
-			p, err := prometheus.NewPrometheusClient(configSpec, url, token, username, password, uuid, skipTLSVerify, prometheusStep, userMetadataContent)
-			if err != nil {
-				log.Fatal(err)
-			}
-			startTime := time.Unix(start, 0)
-			endTime := time.Unix(end, 0)
-			log.Infof("Indexing metrics with UUID %s", uuid)
-			p.JobList = []prometheus.Job{{
-				Start: startTime,
-				End:   endTime,
-				Name:  jobName,
-			},
-			}
-			if err := p.ScrapeJobsMetrics(indexer); err != nil {
-				log.Error(err)
-			}
-			if configSpec.GlobalConfig.WriteToFile && configSpec.GlobalConfig.CreateTarball {
-				err = prometheus.CreateTarball(configSpec.GlobalConfig.MetricsDirectory)
-				if err != nil {
-					log.Fatal(err.Error())
-				}
-			}
+			_ = commons.ProcessMetricsScraperConfig(commons.MetricsScraperConfig{
+				ConfigFile:      configFile,
+				Password:        password,
+				PrometheusStep:  prometheusStep,
+				MetricsEndpoint: metricsEndpoint,
+				MetricsProfile:  metricsProfile,
+				SkipTLSVerify:   skipTLSVerify,
+				URL:             url,
+				Token:           token,
+				Username:        username,
+				UUID:            uuid,
+				StartTime:       start,
+				EndTime:         end,
+				JobName:         jobName,
+				ActionIndex:     true,
+				UserMetaData:    userMetadata,
+			})
 		},
 	}
 	cmd.Flags().StringVar(&uuid, "uuid", uid.NewV4().String(), "Benchmark UUID")
@@ -269,16 +212,15 @@ func indexCmd() *cobra.Command {
 	cmd.Flags().StringVar(&username, "username", "", "Prometheus username for authentication")
 	cmd.Flags().StringVarP(&password, "password", "p", "", "Prometheus password for basic authentication")
 	cmd.Flags().StringVarP(&metricsProfile, "metrics-profile", "m", "metrics.yml", "Metrics profile file")
+	cmd.Flags().StringVarP(&metricsEndpoint, "metrics-endpoint", "e", "", "YAML file with a list of metric endpoints")
 	cmd.Flags().BoolVar(&skipTLSVerify, "skip-tls-verify", true, "Verify prometheus TLS certificate")
 	cmd.Flags().DurationVarP(&prometheusStep, "step", "s", 30*time.Second, "Prometheus step size")
 	cmd.Flags().Int64VarP(&start, "start", "", time.Now().Unix()-3600, "Epoch start time")
 	cmd.Flags().Int64VarP(&end, "end", "", time.Now().Unix(), "Epoch end time")
 	cmd.Flags().StringVarP(&configFile, "config", "c", "", "Config file path or URL")
 	cmd.Flags().StringVarP(&jobName, "job-name", "j", "kube-burner-indexing", "Indexing job name")
 	cmd.Flags().StringVar(&userMetadata, "user-metadata", "", "User provided metadata file, in YAML format")
-	cmd.MarkFlagRequired("prometheus-url")
 	cmd.MarkFlagRequired("config")
-	cmd.MarkFlagsMutuallyExclusive("prometheus-url", "config")
 	cmd.Flags().SortFlags = false
 	return cmd
 }

diff --git a/cmd/kube-burner/ocp.go b/cmd/kube-burner/ocp.go
@@ -41,6 +41,7 @@ func openShiftCmd() *cobra.Command {
 	var wh workloads.WorkloadHelper
 	esServer := ocpCmd.PersistentFlags().String("es-server", "", "Elastic Search endpoint")
 	esIndex := ocpCmd.PersistentFlags().String("es-index", "", "Elastic Search index")
+	metricsEndpoint := ocpCmd.PersistentFlags().String("metrics-endpoint", "", "YAML file with a list of metric endpoints")
 	alerting := ocpCmd.PersistentFlags().Bool("alerting", true, "Enable alerting")
 	uuid := ocpCmd.PersistentFlags().String("uuid", uid.NewV4().String(), "Benchmark UUID")
 	timeout := ocpCmd.PersistentFlags().Duration("timeout", 3*time.Hour, "Benchmark timeout")
@@ -62,7 +63,7 @@ func openShiftCmd() *cobra.Command {
 			"GC":        fmt.Sprintf("%v", *gc),
 		}
 		discoveryAgent := discovery.NewDiscoveryAgent()
-		wh = workloads.NewWorkloadHelper(envVars, *alerting, OCPConfig, discoveryAgent, indexing, *timeout, *userMetadata)
+		wh = workloads.NewWorkloadHelper(envVars, *alerting, OCPConfig, discoveryAgent, indexing, *timeout, *metricsEndpoint, *userMetadata)
 		wh.Metadata.UUID = *uuid
 		if *extract {
 			if err := wh.ExtractWorkload(cmd.Name(), workloads.MetricsProfileMap[cmd.Name()]); err != nil {

diff --git a/docs/cli.md b/docs/cli.md
@@ -39,6 +39,7 @@ This option is meant to run Kube-burner benchmark, and it supports the these fla
 - log-level: Logging level. Default `info`
 - prometheus-url: Prometheus full URL. i.e. `https://prometheus-k8s-openshift-monitoring.apps.rsevilla.stress.mycluster.example.com`
 - metrics-profile: Path to a valid metrics profile file. Default `metrics.yaml`
+- metrics-endpoint: Path to a valid metrics endpoint file.
 - token: Prometheus Bearer token.
 - username: Prometheus username for basic authentication.
 - password: Prometheus password for basic authentication.
@@ -67,6 +68,25 @@ If you have no interest in collecting prometheus metrics, kube-burner can also b
 $ kube-burner init -c cfg.yml --uuid 67f9ec6d-6a9e-46b6-a3bb-065cde988790`
 ```
 
+To scrape metrics from multiple endpoints init command can be triggered as below.
+
+```console
+$ kube-burner init -c cluster-density.yml -e metrics-endpoints.yaml
+```
+
+And a metrics-endpoints.yaml file with valid keys for the `init` command would look something like this.
+
+```
+- endpoint: http://localhost:9090
+  token: <token>
+  profile: metrics.yaml
+  alertProfile: alert-profile.yaml
+- endpoint: http://remotehost:9090
+  token: <token>
+```
+
+Note: Options `profile`, `alertProfile` are optional. If not provided will be taken from the CLI flags first or else will be populated with the default values. And also apart from valid keys rest all will be ignored.
+
 ## Index
 
 This option can be used to collect and index the metrics from a given time range. The time range is given by:

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -27,6 +27,7 @@ In this section is described global job configuration, it holds the following pa
 | prometheusURL    | Prometheus URL endpoint, flag has precedence                                                             | String       | <http://prometheus.endpoint.com:9000>      | ""         |
 | bearerToken      | Bearer token to access the Prometheus endpoint                                                           | String       | thisIsAValidToken       | ""         |
 | metricsProfile   | Path to the metrics profile configuration file                                                           | String       | metrics.yml       | ""         |
+| metricsEndpoint   | Path to the metrics endpoint configuration file containing a list of target endpoints, flag has precedence |  String  | metrics-endpoint.yml       | "" |
 | GC               | Garbage collect created namespaces                                                                       | Boolean        | true            | false      |
 
 kube-burner connects to the k8s cluster using the following methods in this order:

diff --git a/docs/metrics.md b/docs/metrics.md
@@ -159,3 +159,37 @@ INFO[2021-06-23 11:39:42] Importing metrics from doc.json
 INFO[2021-06-23 11:39:43] Indexing [1] documents in kube-burner
 INFO[2021-06-23 11:39:43] Successfully indexed [1] documents in 208ms in kube-burner
 ```
+## Scraping from multiple endpoints
+We also have the option to scrape from multiple prometheus endpoints and pubish the results to the target indexer with the single `uuid` in the `index` command. For this we can either use `-e` or `--metrics-endpoint` option in the CLI. If wanted to specify the same from the config file `metricsEndpoint` is the relevant option.
+
+Note: The CLI flag has precedence over the parameter specified in the config file.
+
+e.g.
+
+```console
+$ kube-burner index -c config.yaml --job-name=test-job -e metrics-endpoints.yaml 
+INFO[2023-02-09 19:19:23] 📁 Creating indexer: elastic                  
+INFO[2023-02-09 19:19:24] 👽 Initializing prometheus client             
+INFO[2023-02-09 19:19:24] Scraping for the prometheus entry with params - {Endpoint:https://prometheus-k8s-openshift-monitoring.apps.vchalla-test.perfscale.devcluster.openshift.com, Profile:/home/vchalla/e2e-benchmarking/workloads/kube-burner/metrics-profiles/metrics.yaml, Start:2023-02-09 18:19:23 -0500 EST, End:2023-02-09 19:19:23 -0500 EST} 
+INFO[2023-02-09 19:19:24] Indexing metrics with UUID 6ac5fb8a-eca4-4381-b99c-63c1116bfeec 
+INFO[2023-02-09 19:19:24] 🔍 Scraping prometheus metrics for benchmark from 2023-02-09T18:19:23-05:00 to 2023-02-09T19:19:23-05:00 
+INFO[2023-02-09 19:19:27] 👽 Initializing prometheus client             
+INFO[2023-02-09 19:19:27] Scraping for the prometheus entry with params - {Endpoint:https://prometheus-k8s-openshift-monitoring.apps.vchalla-test.perfscale.devcluster.openshift.com, Profile:/home/vchalla/e2e-benchmarking/workloads/kube-burner/metrics-profiles/metrics-ovn.yaml, Start:2023-02-09 18:19:23 -0500 EST, End:2023-02-09 19:19:23 -0500 EST} 
+INFO[2023-02-09 19:19:27] Indexing metrics with UUID 6ac5fb8a-eca4-4381-b99c-63c1116bfeec 
+INFO[2023-02-09 19:19:27] 🔍 Scraping prometheus metrics for benchmark from 2023-02-09T18:19:23-05:00 to 2023-02-09T19:19:23-05:00 
+INFO[2023-02-09 19:23:06] 👋 Exiting kube-burner 6ac5fb8a-eca4-4381-b99c-63c1116bfeec 
+```
+
+And the metrics-endpoints.yaml file with valid keys for the `index` command would look something like this.
+
+```
+- endpoint: http://localhost:9090
+  token: <token>
+  profile: metrics.yaml
+  start: 126213213
+  end: 234324324
+- endpoint: http://remotehost:9090
+  token: <token>
+```
+
+Note: Options `profile`, `start` and `end` are optional. If not provided will be taken from the CLI flags first or else will be populated with the default values. And also apart from valid keys rest all will be ignored.
diff --git a/docs/ocp.md b/docs/ocp.md
@@ -17,16 +17,17 @@ Available Commands:
   node-density-heavy Runs node-density-heavy workload
 
 Flags:
-      --alerting           Enable alerting (default true)
-      --burst int          Burst (default 20)
-      --es-index string    Elastic Search index
-      --es-server string   Elastic Search endpoint
-      --extract            Extract workload in the current directory
-      --gc                 Garbage collect created namespaces (default true)
-  -h, --help               help for ocp
-      --qps int            QPS (default 20)
-      --timeout duration   Benchmark timeout (default 3h0m0s)
-      --uuid string        Benchmark UUID (default "a535fd13-3e9d-435a-8d82-0592dc8671c8")
+      --alerting                  Enable alerting (default true)
+      --burst int                 Burst (default 20)
+      --es-index string           Elastic Search index
+      --es-server string          Elastic Search endpoint
+      --extract                   Extract workload in the current directory
+      --gc                        Garbage collect created namespaces (default true)
+  -h, --help                      help for ocp
+      --metrics-endpoint string   YAML file with a list of metric endpoints
+      --qps int                   QPS (default 20)
+      --timeout duration          Benchmark timeout (default 2h0m0s)
+      --uuid string               Benchmark UUID (default "ff60bd1c-df27-4713-be3e-6b92acdd4d72")
 
 Global Flags:
       --log-level string   Allowed values: trace, debug, info, warn, error, fatal (default "info")
@@ -43,9 +44,15 @@ Running node-density with 100 pods per node
 
 ```console
 $ kube-burner ocp node-density --pods-per-node=100
-$
 ```
 
+Running cluster-density with multiple endpoints support
+
+```console
+$ kube-burner ocp cluster-density --iterations=1 --churn-duration=2m0s --es-index kube-burner --es-server https://www.esurl.com:443 --metrics-endpoint metrics-endpoints.yaml
+```
+
+
 With the command above, the wrapper will calculate the required number of pods to deploy across all worker nodes of the cluster.
 
 This wrapper provides the following benefits among others:

diff --git a/pkg/alerting/alert_manager.go b/pkg/alerting/alert_manager.go
@@ -82,7 +82,7 @@ type descriptionTemplate struct {
 
 // NewAlertManager creates a new alert manager
 func NewAlertManager(alertProfileCfg string, uuid, indexName string, indexer *indexers.Indexer, prometheusClient *prometheus.Prometheus) (*AlertManager, error) {
-	log.Info("🔔 Initializing alert manager")
+	log.Infof("🔔 Initializing alert manager for prometheus: %v", prometheusClient.Endpoint)
 	a := AlertManager{
 		prometheus: prometheusClient,
 		uuid:       uuid,