Short circuit data validation on proper etcd restart.

gardener · Mar 18, 2019 · e82b5f1 · e82b5f1
1 parent 4f203a0
commit e82b5f1
Show file tree

Hide file tree

Showing 16 changed files with 750 additions and 135 deletions.
diff --git a/chart/templates/etcd-bootstrap-configmap.yaml b/chart/templates/etcd-bootstrap-configmap.yaml
@@ -9,23 +9,65 @@ metadata:
 data:
   bootstrap.sh: |-
     #!/bin/sh
-    while true;
-    do
-      wget http://localhost:8080/initialization/status -S -O status;
-      STATUS=`cat status`;
-      case $STATUS in
-      "New")
-            wget http://localhost:8080/initialization/start -S -O - ;;
-      "Progress")
-            sleep 1;
-            continue;;
-      "Failed")
-            continue;;
-      "Successful")
-            exec etcd --config-file /var/etcd/config/etcd.conf.yaml
-            ;;
-      esac;
-    done
+    MARKER=/var/etcd/data/etcdmarker
+    
+    trap_and_propagate() {
+        PID=$1
+        shift
+        for sig in "$@" ; do
+            trap "kill -$sig $PID" "$sig"
+        done
+    }
+    
+    start_managed_etcd(){
+          etcd --config-file /bootstrap/etcd.conf.yml &
+          ETCDPID=$!
+          trap_and_propagate $ETCDPID INT TERM
+          wait $ETCDPID
+          RET=$?
+          echo $RET > $MARKER
+          exit $RET
+    }
+    
+    check_and_start_etcd(){
+          while true;
+          do
+            wget http://localhost:8080/initialization/status -S -O status;
+            STATUS=`cat status`;
+            case $STATUS in
+            "New")
+                  wget http://localhost:8080/initialization/start?mode=$1 -S -O - ;;
+            "Progress")
+                  sleep 1;
+                  continue;;
+            "Failed")
+                  continue;;
+            "Successful")
+                  start_managed_etcd
+                  break
+                  ;;
+            esac;
+          done
+    }
+    
+    if [ ! -f $MARKER ] ;
+    then 
+          echo "No marker file. Perform complete initialization routine and start and start etcd."
+          check_and_start_etcd full
+    else
+          echo "Marker file present. Check return status and decide on initialization"
+          run_status=`cat $MARKER`
+          echo "Marker content: $run_status"
+          if [ $run_status == '143' ] || [ $run_status == '130' ] || [ $run_status == '0' ] ; then
+                rm -rf $MARKER
+                echo "Requesting sidecar to perform sanity validation"
+                check_and_start_etcd sanity
+          else
+                rm -rf $MARKER
+                echo "Requesting sidecar to perform full validation"
+                check_and_start_etcd full
+          fi    
+    fi
   etcd.conf.yaml: |-
     # Human-readable name for this member.
     name: etcd-{{.Values.role}}

diff --git a/cmd/initializer.go b/cmd/initializer.go
@@ -20,6 +20,7 @@ import (
 
 	"github.com/coreos/etcd/pkg/types"
 	"github.com/gardener/etcd-backup-restore/pkg/initializer"
+	"github.com/gardener/etcd-backup-restore/pkg/initializer/validator"
 	"github.com/gardener/etcd-backup-restore/pkg/snapshot/restorer"
 	"github.com/gardener/etcd-backup-restore/pkg/snapstore"
 	"github.com/sirupsen/logrus"
@@ -36,6 +37,7 @@ func NewInitializeCommand(stopCh <-chan struct{}) *cobra.Command {
 		Short: "initialize an etcd instance.",
 		Long:  fmt.Sprintf(`Initializes an etcd instance. Data directory is checked for corruption and restored in case of corruption.`),
 		Run: func(cmd *cobra.Command, args []string) {
+			var mode validator.Mode
 			logger := logrus.New()
 
 			clusterUrlsMap, err := types.NewURLsMap(restoreCluster)
@@ -48,6 +50,15 @@ func NewInitializeCommand(stopCh <-chan struct{}) *cobra.Command {
 				logger.Fatalf("failed parsing peers urls for restore cluster: %v", err)
 			}
 
+			switch validator.Mode(validationMode) {
+			case validator.Full:
+				mode = validator.Full
+			case validator.Sanity:
+				mode = validator.Sanity
+			default:
+				logger.Fatal("validation-mode can only be one of these values [full/sanity]")
+			}
+
 			options := &restorer.RestoreOptions{
 				RestoreDataDir:         path.Clean(restoreDataDir),
 				Name:                   restoreName,
@@ -70,13 +81,14 @@ func NewInitializeCommand(stopCh <-chan struct{}) *cobra.Command {
 				}
 			}
 			etcdInitializer := initializer.NewInitializer(options, snapstoreConfig, logger)
-			err = etcdInitializer.Initialize()
+			err = etcdInitializer.Initialize(mode)
 			if err != nil {
 				logger.Fatalf("initializer failed. %v", err)
 			}
 		},
 	}
 	initializeEtcdFlags(initializeCmd)
 	initializeSnapstoreFlags(initializeCmd)
+	initializeValidatorFlags(initializeCmd)
 	return initializeCmd
 }
diff --git a/cmd/miscellaneous.go b/cmd/miscellaneous.go
@@ -17,6 +17,7 @@ package cmd
 import (
 	"runtime"
 
+	"github.com/gardener/etcd-backup-restore/pkg/initializer/validator"
 	ver "github.com/gardener/etcd-backup-restore/pkg/version"
 	"github.com/spf13/cobra"
 )
@@ -36,3 +37,7 @@ func printVersionInfo() {
 	logger.Infof("Go Version: %s", runtime.Version())
 	logger.Infof("Go OS/Arch: %s/%s", runtime.GOOS, runtime.GOARCH)
 }
+
+func initializeValidatorFlags(cmd *cobra.Command) {
+	cmd.Flags().StringVar(&validationMode, "validation-mode", string(validator.Full), "mode to do data initialization[full/sanity]")
+}
diff --git a/cmd/types.go b/cmd/types.go
@@ -64,6 +64,9 @@ var (
 	storagePrefix           string
 	maxParallelChunkUploads int
 	snapstoreTempDir        string
+
+	//initializer flags
+	validationMode string
 )
 
 var emptyStruct struct{}
diff --git a/doc/validation.md b/doc/validation.md
@@ -1 +1,49 @@
 # Etcd data validation
+
+ As etcd is being used to store the state of the K8s cluster, it is mandatory that etcd deployment has to be hardened against data loss. Sufficient checks have to be in place to prevent etcd from erroneously starting with stale/corrupt data and taking stale snapshots to the backing store. We have a data validation flow in place which prevents etcd from starting in case of data corruption. 	
+
+ ## Directory validation	
+The etcd data directory validation comprises of multiple checks as mentioned below:	
+### Structure validation	
+The member directory, snap directory and wal directory are checked to ascertain that they adhere to the directory structure followed by etcd.	
+### Content validation	
+#### Corruption check	
+The contents for the data directory(db file, snap files and wal file) are checked for data corruption. 	
+#### Revision check	
+The revision of etcd data in the db file is checked with the revision of the latest snapshot in the backing store. If the revison in the backing store is greater than that of etcd data in the db file, etcd data is considered stale. This is to prevent etcd snapshots for stale revisions from overwriting legit recent snapshots.	
+
+ ## Validation flow	
+Not all validation steps take the same time to complete. Some validation steps are dependent of the size of etcd data(eg. db file). If the db file is checked for data corruption before etcd startup, it would take longer for etcd to become servicable. Therefore, it is only imperative to perform validation checks on abnormal etcd events like etcd restart after a crash. The validation flow mentioned below is modeled with the aforementioned rationale in mind.	
+
+ * Check if etcd resulted in an abnormal exit. Is it possible to identify previous etcd run status?	
+  * No	
+    * Do directory structure validation.	
+    * Do directory content validation.	
+    * Start etcd	
+  * Yes	
+    * Check if previous exit was normal	
+      * Yes 	
+        * Do revision check	
+        * Do directory structure validation.	
+        * Start etcd	
+      * No  	
+        * Do directory structure validation.	
+        * Do directory content validation.	
+        * Start etcd	
+
+  ## Addition design decisions to be made	
+ Currently, we have the validation check triggered from a bash script in the etcd container. The status of the validation check is polled till its completed and based on the validation status, it is decided whether it is safe to start etcd. During validation if etcd directory is found to be corrupt or stale, the latest snapshot in the backing store is used to restore etcd data to the latest revision. 	
+
+ ### Question 1: Should the sidecar container be able to act on the status of previous etcd run status?	
+
+ * **Option 1**: Yes. The information of previous etcd run may be made available to the sidecar container via configmaps. The idea is that `validate` REST endpoint shall check the shared configmap for status, perform necessary validation and restore steps before etcd start.	
+
+ * **Option 2**: No. If the above-mentioned level of granularity is to be available for validation checks, we would need to modify the REST endpoints to trigger the validation sub-checks. Should we modify the bash script to handle the cases and let the sidecar be agnostic to the status of the previous etcd run?	
+
+ We have chosen the approach were the script decides on the previous exit status of etcd, to call the necessary validation step. If etcd terminated normally then sanity validation is performed else we perform a full etcd data validation.
+
+ ### Question 2: How should status for previous etcd run be identified?	
+* **Option 1**: The error logs of the etcd run can be dumped to an log file in the persistent disk. This can be checked on subsequent validation steps to identify the status of previous etcd run.	
+* **Option 2**: Via exit code stored in a file in the persistent disk. This can be checked on subsequent validation steps to identify the status of previous etcd run.
+
+Since we are do not do an analysis of the logs at this point of time, the log dump and subsequent analysis steps can be taken care of in the necessary PR.
diff --git a/example/etcd-statefulset-aws.yaml b/example/etcd-statefulset-aws.yaml
@@ -12,7 +12,7 @@ spec:
     protocol: TCP
   clusterIP: None
   selector:
-    app: etcd
+    app: etcd-aws 
 ---
 apiVersion: v1
 kind: ConfigMap
@@ -23,23 +23,103 @@ metadata:
 data:
   bootstrap.sh: |-
     #!/bin/sh
-    while true;
-    do
-      wget http://localhost:8080/initialization/status -S -O status;
-      STATUS=`cat status`;
-      case $STATUS in
-      "New")
-            wget http://localhost:8080/initialization/start -S -O - ;;
-      "Progress")
-            sleep 1;
-            continue;;
-      "Failed")
-            continue;;
-      "Successful")
-            exec etcd --data-dir=/var/etcd/data/new.etcd --name=etcd --advertise-client-urls=http://0.0.0.0:2379 --listen-client-urls=http://0.0.0.0:2379 --initial-cluster-state=new --initial-cluster-token=new
-            ;;
-      esac;
-    done
+    MARKER=/var/etcd/data/etcdmarker
+    
+    trap_and_propagate() {
+        PID=$1
+        shift
+        for sig in "$@" ; do
+            trap "kill -$sig $PID" "$sig"
+        done
+    }
+    
+    start_managed_etcd(){
+          etcd --config-file /bootstrap/etcd.conf.yml &
+          ETCDPID=$!
+          trap_and_propagate $ETCDPID INT TERM
+          wait $ETCDPID
+          RET=$?
+          echo $RET > $MARKER
+          exit $RET
+    }
+    
+    check_and_start_etcd(){
+          while true;
+          do
+            wget http://localhost:8080/initialization/status -S -O status;
+            STATUS=`cat status`;
+            case $STATUS in
+            "New")
+                  wget http://localhost:8080/initialization/start?mode=$1 -S -O - ;;
+            "Progress")
+                  sleep 1;
+                  continue;;
+            "Failed")
+                  continue;;
+            "Successful")
+                  start_managed_etcd
+                  break
+                  ;;
+            esac;
+          done
+    }
+    
+    if [ ! -f $MARKER ] ;
+    then 
+          echo "No marker file. Perform complete initialization routine and start and start etcd."
+          check_and_start_etcd full
+    else
+          echo "Marker file present. Check return status and decide on initialization"
+          run_status=`cat $MARKER`
+          echo "Marker content: $run_status"
+          if [ $run_status == '143' ] || [ $run_status == '130' ] || [ $run_status == '0' ] ; then
+                rm -rf $MARKER
+                echo "Requesting sidecar to perform sanity validation"
+                check_and_start_etcd sanity
+          else
+                rm -rf $MARKER
+                echo "Requesting sidecar to perform full validation"
+                check_and_start_etcd full
+          fi    
+    fi
+  etcd.conf.yml: |-
+      # This is the configuration file for the etcd server.
+
+      # Human-readable name for this member.
+      name: etcd-new
+
+      # Path to the data directory.
+      data-dir: /var/etcd/data/new.etcd
+
+      # List of this member's client URLs to advertise to the public.
+      # The URLs needed to be a comma-separated list.
+      advertise-client-urls: http://0.0.0.0:2379
+
+      # List of comma separated URLs to listen on for client traffic.
+      listen-client-urls: http://0.0.0.0:2379
+
+      # Initial cluster token for the etcd cluster during bootstrap.
+      initial-cluster-token: 'new'
+
+      # Initial cluster state ('new' or 'existing').
+      initial-cluster-state: 'new'
+
+      # Number of committed transactions to trigger a snapshot to disk.
+      snapshot-count: 75000  
+
+      # Raise alarms when backend size exceeds the given quota. 0 means use the
+      # default quota.
+      quota-backend-bytes: 8589934592
+
+      # Accept etcd V2 client requests
+      enable-v2: false
+
+      # keep one day of history
+      auto-compaction-mode: periodic
+      auto-compaction-retention: "24"
+  
+    
+    
 ---
 apiVersion: apps/v1beta1
 kind: StatefulSet