From 6b148c27d6b1780fc7ce4707fab9579bfa408eea Mon Sep 17 00:00:00 2001 From: Priya Samuel Date: Thu, 3 May 2018 13:12:17 +0000 Subject: [PATCH 01/16] NFC: Comment out test until we can work out how to fix it --- .gitlab-ci.yml | 2 +- tests/acceptance_test.go | 129 ++++++++++++++++++++------------------- 2 files changed, 66 insertions(+), 65 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9cd4fd846..1af40c077 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,4 @@ -variables: +2ariables: SERVER_NAME: dotmesh-server PROVISIONER_NAME: dotmesh-dynamic-provisioner diff --git a/tests/acceptance_test.go b/tests/acceptance_test.go index 634a0d26c..88d0ecafe 100644 --- a/tests/acceptance_test.go +++ b/tests/acceptance_test.go @@ -930,70 +930,71 @@ func TestTwoNodesSameCluster(t *testing.T) { } }) - t.Run("Divergence", func(t *testing.T) { - fsname := citools.UniqName() - citools.RunOnNode(t, node1, citools.DockerRun(fsname)+" sh -c 'echo WORLD > /foo/HELLO'") - citools.RunOnNode(t, node1, "dm switch "+fsname) - citools.RunOnNode(t, node1, "dm commit -m 'First commit'") - ensureCurrentDotIsFullyReplicated(t, node1) - citools.RunOnNode(t, node1, "dm dot show") - citools.RunOnNode(t, node2, "dm dot show") - - fsId := strings.TrimSpace(citools.OutputFromRunOnNode(t, node1, "dm dot show -H | grep masterBranchId | cut -f 2")) - - // Kill node1 - stopContainers(t, node1) - - // Commit on node2 - citools.RunOnNode(t, node2, "dm dot smash-branch-master "+fsname+" master") - citools.RunOnNode(t, node2, citools.DockerRun(fsname)+" sh -c 'echo WORLD > /foo/HELLO2'") - citools.RunOnNode(t, node2, "dm switch "+fsname) - citools.RunOnNode(t, node2, "dm commit -m 'node2 commit'") - citools.RunOnNode(t, node2, "dm dot show") - - // Kill node2 - stopContainers(t, node2) - - // Start node1 - startContainers(t, node1) - - // Commit on node1 - citools.RunOnNode(t, node1, "dm dot smash-branch-master "+fsname+" master") - - zfsPath := strings.Replace(node1, "cluster", "testpool", -1) + "/dmfs/" + fsId + "@Node1CommitHash" - - // Manual ZFS snapshot to circumvent etcd - citools.RunOnNode(t, node1, "docker exec -t dotmesh-server-inner zfs snapshot "+zfsPath) - - stopContainers(t, node1) - startContainers(t, node1) - citools.RunOnNode(t, node1, "dm dot show") - - // Start node2 and enjoy the diverged state - startContainers(t, node2) - citools.RunOnNode(t, node1, "dm dot show") - citools.RunOnNode(t, node2, "dm dot show") - - // Check status of convergence - for _, node := range [...]string{node1, node2} { - dotStatus := citools.OutputFromRunOnNode(t, node, "dm dot show") - if !strings.Contains(dotStatus, "DIVERGED") || strings.Contains(dotStatus, "is missing") { - t.Errorf("Absence of Divergence branch or incomplete resolution on node: %s\n%s", node, dotStatus) - } - dmLog := citools.OutputFromRunOnNode(t, node, "dm log") - if !strings.Contains(dmLog, "First commit") || !strings.Contains(dmLog, "Node1CommitHash") { - t.Errorf("Absence of converged commits on branch master on node :%s\n%s", node, dmLog) - } - - dmBranch := citools.OutputFromRunOnNode(t, node, "dm branch | grep DIVERGED") - citools.RunOnNode(t, node, fmt.Sprintf("dm checkout %s", strings.TrimSpace(dmBranch))) - - dmLog = citools.OutputFromRunOnNode(t, node, "dm log") - if !strings.Contains(dmLog, "node2 commit") { - t.Errorf("Absence of non-master diverged commits on branch *DIVERGED on node :%s\n%s", node, dmLog) - } - } - }) + //t.Run("Divergence", func(t *testing.T) { + // fsname := citools.UniqName() + // citools.RunOnNode(t, node1, citools.DockerRun(fsname)+" sh -c 'echo WORLD > /foo/HELLO'") + // citools.RunOnNode(t, node1, "dm switch "+fsname) + // citools.RunOnNode(t, node1, "dm commit -m 'First commit'") + // ensureCurrentDotIsFullyReplicated(t, node1) + // citools.RunOnNode(t, node1, "dm dot show") + // citools.RunOnNode(t, node2, "dm switch "+fsname) + // citools.RunOnNode(t, node2, "dm dot show") + // + // fsId := strings.TrimSpace(citools.OutputFromRunOnNode(t, node1, "dm dot show -H | grep masterBranchId | cut -f 2")) + // + // // Kill node1 + // stopContainers(t, node1) + // + // // Commit on node2 + // citools.RunOnNode(t, node2, "dm dot smash-branch-master "+fsname+" master") + // citools.RunOnNode(t, node2, citools.DockerRun(fsname)+" sh -c 'echo WORLD > /foo/HELLO2'") + // citools.RunOnNode(t, node2, "dm switch "+fsname) + // citools.RunOnNode(t, node2, "dm commit -m 'node2 commit'") + // citools.RunOnNode(t, node2, "dm dot show") + // + // // Kill node2 + // stopContainers(t, node2) + // + // // Start node1 + // startContainers(t, node1) + // + // // Commit on node1 + // citools.RunOnNode(t, node1, "dm dot smash-branch-master "+fsname+" master") + // + // zfsPath := strings.Replace(node1, "cluster", "testpool", -1) + "/dmfs/" + fsId + "@Node1CommitHash" + // + // // Manual ZFS snapshot to circumvent etcd + // citools.RunOnNode(t, node1, "docker exec -t dotmesh-server-inner zfs snapshot "+zfsPath) + // + // stopContainers(t, node1) + // startContainers(t, node1) + // citools.RunOnNode(t, node1, "dm dot show") + // + // // Start node2 and enjoy the diverged state + // startContainers(t, node2) + // citools.RunOnNode(t, node1, "dm dot show") + // citools.RunOnNode(t, node2, "dm dot show") + // + // // Check status of convergence + // for _, node := range [...]string{node1, node2} { + // dotStatus := citools.OutputFromRunOnNode(t, node, "dm dot show") + // if !strings.Contains(dotStatus, "DIVERGED") || strings.Contains(dotStatus, "is missing") { + // t.Errorf("Absence of Divergence branch or incomplete resolution on node: %s\n%s", node, dotStatus) + // } + // dmLog := citools.OutputFromRunOnNode(t, node, "dm log") + // if !strings.Contains(dmLog, "First commit") || !strings.Contains(dmLog, "Node1CommitHash") { + // t.Errorf("Absence of converged commits on branch master on node :%s\n%s", node, dmLog) + // } + // + // dmBranch := citools.OutputFromRunOnNode(t, node, "dm branch | grep DIVERGED") + // citools.RunOnNode(t, node, fmt.Sprintf("dm checkout %s", strings.TrimSpace(dmBranch))) + // + // dmLog = citools.OutputFromRunOnNode(t, node, "dm log") + // if !strings.Contains(dmLog, "node2 commit") { + // t.Errorf("Absence of non-master diverged commits on branch *DIVERGED on node :%s\n%s", node, dmLog) + // } + // } + //}) } func TestTwoDoubleNodeClusters(t *testing.T) { From f55f5c2a3caa55d3f4405c49a37282c7a021d0ec Mon Sep 17 00:00:00 2001 From: Priya Samuel Date: Thu, 3 May 2018 13:15:50 +0000 Subject: [PATCH 02/16] NFC: fix typo sneaked into yaml --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1af40c077..9cd4fd846 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,4 +1,4 @@ -2ariables: +variables: SERVER_NAME: dotmesh-server PROVISIONER_NAME: dotmesh-dynamic-provisioner From 8af6e53e18cfa08b9c75216d40ab731af8bf9a8e Mon Sep 17 00:00:00 2001 From: Alaric Snell-Pym Date: Thu, 3 May 2018 17:17:06 +0100 Subject: [PATCH 03/16] #344: We no longer need the GKE yamls (that's handled in the ConfigMap), and they're not referenced from the docs any more. --- kubernetes/rebuild.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/kubernetes/rebuild.sh b/kubernetes/rebuild.sh index 1ae3efed6..7031f75e5 100755 --- a/kubernetes/rebuild.sh +++ b/kubernetes/rebuild.sh @@ -17,9 +17,7 @@ fi sed "s/DOCKER_TAG/$CI_DOCKER_TAG/" < dotmesh.yaml > $OUT/dotmesh-k8s-1.7.yaml sed "s_rbac.authorization.k8s.io/v1beta1_rbac.authorization.k8s.io/v1_"< $OUT/dotmesh-k8s-1.7.yaml > $OUT/dotmesh-k8s-1.8.yaml -cp $OUT/dotmesh-k8s-1.8.yaml $OUT/dotmesh-k8s-1.8.gke.yaml sed "s_ClusterIP_LoadBalancer_" < $OUT/dotmesh-k8s-1.8.yaml > $OUT/dotmesh-k8s-1.8.aks.yaml -cp $OUT/dotmesh-k8s-1.7.yaml $OUT/dotmesh-k8s-1.7.gke.yaml cp configmap.yaml $OUT/configmap.yaml sed "s_/usr/libexec/kubernetes/kubelet-plugins/volume/exec_/home/kubernetes/flexvolume_" < configmap.yaml > $OUT/configmap.gke.yaml From bce9ca2048e3b8a754347fc1d61f62a68b07cb10 Mon Sep 17 00:00:00 2001 From: Priya Samuel Date: Fri, 4 May 2018 08:34:25 +0000 Subject: [PATCH 04/16] NFC: Re-enable flaky test for debugging --- tests/acceptance_test.go | 130 +++++++++++++++++++-------------------- 1 file changed, 65 insertions(+), 65 deletions(-) diff --git a/tests/acceptance_test.go b/tests/acceptance_test.go index 88d0ecafe..e5cf1a65f 100644 --- a/tests/acceptance_test.go +++ b/tests/acceptance_test.go @@ -930,71 +930,71 @@ func TestTwoNodesSameCluster(t *testing.T) { } }) - //t.Run("Divergence", func(t *testing.T) { - // fsname := citools.UniqName() - // citools.RunOnNode(t, node1, citools.DockerRun(fsname)+" sh -c 'echo WORLD > /foo/HELLO'") - // citools.RunOnNode(t, node1, "dm switch "+fsname) - // citools.RunOnNode(t, node1, "dm commit -m 'First commit'") - // ensureCurrentDotIsFullyReplicated(t, node1) - // citools.RunOnNode(t, node1, "dm dot show") - // citools.RunOnNode(t, node2, "dm switch "+fsname) - // citools.RunOnNode(t, node2, "dm dot show") - // - // fsId := strings.TrimSpace(citools.OutputFromRunOnNode(t, node1, "dm dot show -H | grep masterBranchId | cut -f 2")) - // - // // Kill node1 - // stopContainers(t, node1) - // - // // Commit on node2 - // citools.RunOnNode(t, node2, "dm dot smash-branch-master "+fsname+" master") - // citools.RunOnNode(t, node2, citools.DockerRun(fsname)+" sh -c 'echo WORLD > /foo/HELLO2'") - // citools.RunOnNode(t, node2, "dm switch "+fsname) - // citools.RunOnNode(t, node2, "dm commit -m 'node2 commit'") - // citools.RunOnNode(t, node2, "dm dot show") - // - // // Kill node2 - // stopContainers(t, node2) - // - // // Start node1 - // startContainers(t, node1) - // - // // Commit on node1 - // citools.RunOnNode(t, node1, "dm dot smash-branch-master "+fsname+" master") - // - // zfsPath := strings.Replace(node1, "cluster", "testpool", -1) + "/dmfs/" + fsId + "@Node1CommitHash" - // - // // Manual ZFS snapshot to circumvent etcd - // citools.RunOnNode(t, node1, "docker exec -t dotmesh-server-inner zfs snapshot "+zfsPath) - // - // stopContainers(t, node1) - // startContainers(t, node1) - // citools.RunOnNode(t, node1, "dm dot show") - // - // // Start node2 and enjoy the diverged state - // startContainers(t, node2) - // citools.RunOnNode(t, node1, "dm dot show") - // citools.RunOnNode(t, node2, "dm dot show") - // - // // Check status of convergence - // for _, node := range [...]string{node1, node2} { - // dotStatus := citools.OutputFromRunOnNode(t, node, "dm dot show") - // if !strings.Contains(dotStatus, "DIVERGED") || strings.Contains(dotStatus, "is missing") { - // t.Errorf("Absence of Divergence branch or incomplete resolution on node: %s\n%s", node, dotStatus) - // } - // dmLog := citools.OutputFromRunOnNode(t, node, "dm log") - // if !strings.Contains(dmLog, "First commit") || !strings.Contains(dmLog, "Node1CommitHash") { - // t.Errorf("Absence of converged commits on branch master on node :%s\n%s", node, dmLog) - // } - // - // dmBranch := citools.OutputFromRunOnNode(t, node, "dm branch | grep DIVERGED") - // citools.RunOnNode(t, node, fmt.Sprintf("dm checkout %s", strings.TrimSpace(dmBranch))) - // - // dmLog = citools.OutputFromRunOnNode(t, node, "dm log") - // if !strings.Contains(dmLog, "node2 commit") { - // t.Errorf("Absence of non-master diverged commits on branch *DIVERGED on node :%s\n%s", node, dmLog) - // } - // } - //}) + t.Run("Divergence", func(t *testing.T) { + fsname := citools.UniqName() + citools.RunOnNode(t, node1, citools.DockerRun(fsname)+" sh -c 'echo WORLD > /foo/HELLO'") + citools.RunOnNode(t, node1, "dm switch "+fsname) + citools.RunOnNode(t, node1, "dm commit -m 'First commit'") + ensureCurrentDotIsFullyReplicated(t, node1) + citools.RunOnNode(t, node1, "dm dot show") + citools.RunOnNode(t, node2, "dm switch "+fsname) + citools.RunOnNode(t, node2, "dm dot show") + + fsId := strings.TrimSpace(citools.OutputFromRunOnNode(t, node1, "dm dot show -H | grep masterBranchId | cut -f 2")) + + // Kill node1 + stopContainers(t, node1) + + // Commit on node2 + citools.RunOnNode(t, node2, "dm dot smash-branch-master "+fsname+" master") + citools.RunOnNode(t, node2, citools.DockerRun(fsname)+" sh -c 'echo WORLD > /foo/HELLO2'") + citools.RunOnNode(t, node2, "dm switch "+fsname) + citools.RunOnNode(t, node2, "dm commit -m 'node2 commit'") + citools.RunOnNode(t, node2, "dm dot show") + + // Kill node2 + stopContainers(t, node2) + + // Start node1 + startContainers(t, node1) + + // Commit on node1 + citools.RunOnNode(t, node1, "dm dot smash-branch-master "+fsname+" master") + + zfsPath := strings.Replace(node1, "cluster", "testpool", -1) + "/dmfs/" + fsId + "@Node1CommitHash" + + // Manual ZFS snapshot to circumvent etcd + citools.RunOnNode(t, node1, "docker exec -t dotmesh-server-inner zfs snapshot "+zfsPath) + + stopContainers(t, node1) + startContainers(t, node1) + citools.RunOnNode(t, node1, "dm dot show") + + // Start node2 and enjoy the diverged state + startContainers(t, node2) + citools.RunOnNode(t, node1, "dm dot show") + citools.RunOnNode(t, node2, "dm dot show") + + // Check status of convergence + for _, node := range [...]string{node1, node2} { + dotStatus := citools.OutputFromRunOnNode(t, node, "dm dot show") + if !strings.Contains(dotStatus, "DIVERGED") || strings.Contains(dotStatus, "is missing") { + t.Errorf("Absence of Divergence branch or incomplete resolution on node: %s\n%s", node, dotStatus) + } + dmLog := citools.OutputFromRunOnNode(t, node, "dm log") + if !strings.Contains(dmLog, "First commit") || !strings.Contains(dmLog, "Node1CommitHash") { + t.Errorf("Absence of converged commits on branch master on node :%s\n%s", node, dmLog) + } + + dmBranch := citools.OutputFromRunOnNode(t, node, "dm branch | grep DIVERGED") + citools.RunOnNode(t, node, fmt.Sprintf("dm checkout %s", strings.TrimSpace(dmBranch))) + + dmLog = citools.OutputFromRunOnNode(t, node, "dm log") + if !strings.Contains(dmLog, "node2 commit") { + t.Errorf("Absence of non-master diverged commits on branch *DIVERGED on node :%s\n%s", node, dmLog) + } + } + }) } func TestTwoDoubleNodeClusters(t *testing.T) { From 852b2e90b25c94a684704e5f23cf097e611358a6 Mon Sep 17 00:00:00 2001 From: Alaric Snell-Pym Date: Fri, 4 May 2018 11:43:31 +0100 Subject: [PATCH 05/16] NFC: Comments concerning pod health checking --- cmd/dotmesh-server/pkg/operator/main.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cmd/dotmesh-server/pkg/operator/main.go b/cmd/dotmesh-server/pkg/operator/main.go index e77566458..aaa58139a 100644 --- a/cmd/dotmesh-server/pkg/operator/main.go +++ b/cmd/dotmesh-server/pkg/operator/main.go @@ -472,6 +472,17 @@ func (c *dotmeshController) process() error { // At this point, we believe this is a valid running Dotmesh pod. // That node has a dotmesh, so isn't undotted. + + // IDEA: We could try and health-check the pod if we find its IP + // and send it a Dotmesh RPC call, but we need to be careful NOT + // to consider pods still in the throes of startup broken and + // mark them for death. Perhaps we need to compare their age + // against a timeout value, and allow health-check failures for + // pods younger than a certain age. But how to set that age? On + // a busy cluster with a flakey Internet connection, could image + // fetching take an age? Perhaps we only eliminate "Running" + // pods that don't respond to a health-check over a certain age? + // Where do we draw the line? glog.V(2).Infof("Observing pod %s running %s on %s (status: %s)", podName, image, boundNode, dotmesh.Status.Phase) delete(undottedNodes, boundNode) } From e36dc0c0ad567f9ef0dd079c747bf5d30d6e6211 Mon Sep 17 00:00:00 2001 From: Priya Samuel Date: Fri, 4 May 2018 11:34:08 +0000 Subject: [PATCH 06/16] #352: Attempt to reduce flakiness by checking replication status on both nodes in a cluster --- tests/acceptance_test.go | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/acceptance_test.go b/tests/acceptance_test.go index e5cf1a65f..7f108edd5 100644 --- a/tests/acceptance_test.go +++ b/tests/acceptance_test.go @@ -935,10 +935,8 @@ func TestTwoNodesSameCluster(t *testing.T) { citools.RunOnNode(t, node1, citools.DockerRun(fsname)+" sh -c 'echo WORLD > /foo/HELLO'") citools.RunOnNode(t, node1, "dm switch "+fsname) citools.RunOnNode(t, node1, "dm commit -m 'First commit'") - ensureCurrentDotIsFullyReplicated(t, node1) - citools.RunOnNode(t, node1, "dm dot show") - citools.RunOnNode(t, node2, "dm switch "+fsname) - citools.RunOnNode(t, node2, "dm dot show") + ensureCurrentDotIsFullyReplicated(t, node1, fsname) + ensureCurrentDotIsFullyReplicated(t, node2, fsname) fsId := strings.TrimSpace(citools.OutputFromRunOnNode(t, node1, "dm dot show -H | grep masterBranchId | cut -f 2")) @@ -2233,10 +2231,9 @@ spec: citools.DumpTiming() } -func ensureCurrentDotIsFullyReplicated(t *testing.T, node string) { +func ensureCurrentDotIsFullyReplicated(t *testing.T, node string, fsname string) { for try := 1; try <= 5; try++ { - fmt.Printf("Dotmesh containers running on %s: ", node) - st := citools.OutputFromRunOnNode(t, node, "dm dot show | grep missing || true") + st := citools.OutputFromRunOnNode(t, node, fmt.Sprintf("dm dot show %s | grep missing || true", fsname)) if st == "" { return } else { From cfda80a537baab4feba6f2782972e48a62960fbe Mon Sep 17 00:00:00 2001 From: Priya Samuel Date: Fri, 4 May 2018 11:42:36 +0000 Subject: [PATCH 07/16] NFC: Fix logging on error messages --- cmd/dotmesh-server/pkg/main/statemachines.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cmd/dotmesh-server/pkg/main/statemachines.go b/cmd/dotmesh-server/pkg/main/statemachines.go index 1decbd8e0..d0a53fa9f 100644 --- a/cmd/dotmesh-server/pkg/main/statemachines.go +++ b/cmd/dotmesh-server/pkg/main/statemachines.go @@ -689,9 +689,7 @@ func activeState(f *fsMachine) stateFn { // refuse to move if we have any containers running containers, err := f.containersRunning() if err != nil { - log.Printf( - "Can't move filesystem while we can't list whether containers are using it", - ) + log.Printf("Can't move filesystem while we can't list whether containers are using it") f.innerResponses <- &Event{ Name: "error-listing-containers-during-move", Args: &EventArgs{"err": err}, @@ -849,7 +847,7 @@ func activeState(f *fsMachine) stateFn { // fail if any containers running containers, err := f.containersRunning() if err != nil { - log.Printf("Can't unmount filesystem while containers are using it") + log.Printf("Can't unmount filesystem when we are unable to list containers using it") f.innerResponses <- &Event{ Name: "error-listing-containers-during-unmount", Args: &EventArgs{"err": err}, @@ -857,6 +855,7 @@ func activeState(f *fsMachine) stateFn { return backoffState } if len(containers) > 0 { + log.Printf("Can't unmount filesystem while containers are using it") f.innerResponses <- &Event{ Name: "cannot-unmount-while-running-containers", Args: &EventArgs{"containers": containers}, From 00a8454a5d013488e60fa82bc91a4156faa84fa5 Mon Sep 17 00:00:00 2001 From: Alaric Snell-Pym Date: Fri, 4 May 2018 14:06:45 +0100 Subject: [PATCH 08/16] #17: Made the smoke test push to a remote cluster (if credentials are passed into SMOKE_TEST_REMOTE and SMOKE_TEST_APIKEY). Also moved it into `scripts`, as per #36. --- .gitlab-ci.yml | 6 ++--- scripts/smoke.sh | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ smoke.sh | 22 ---------------- 3 files changed, 70 insertions(+), 25 deletions(-) create mode 100755 scripts/smoke.sh delete mode 100755 smoke.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9cd4fd846..c664cf001 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -114,7 +114,7 @@ build_server: # cd cmd/dotmesh-server; # docker build -t dotmesh-server . || (sleep 30; docker build -t dotmesh-server .) # cd ../.. -# ./smoke.sh $(pwd)/binaries/Linux/dm dotmesh-server +# ./scripts/smoke.sh $(pwd)/binaries/Linux/dm dotmesh-server # docker system prune -fa # " @@ -134,7 +134,7 @@ macos_docker_stable: # rebuilding it here. - docker build -t dotmesh-server . || (sleep 30; docker build -t dotmesh-server .) - cd ../.. - - ./smoke.sh $(pwd)/binaries/Darwin/dm dotmesh-server + - ./scripts/smoke.sh $(pwd)/binaries/Darwin/dm dotmesh-server - docker version - which docker - docker container prune -f @@ -155,7 +155,7 @@ macos_docker_edge: # rebuilding it here. - docker build -t dotmesh-server . || (sleep 30; docker build -t dotmesh-server .) - cd ../.. - - ./smoke.sh $(pwd)/binaries/Darwin/dm dotmesh-server + - ./scripts/smoke.sh $(pwd)/binaries/Darwin/dm dotmesh-server - docker version - which docker - docker container prune -f diff --git a/scripts/smoke.sh b/scripts/smoke.sh new file mode 100755 index 000000000..4a3271fc5 --- /dev/null +++ b/scripts/smoke.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +set -xe + +# Smoke test to see whether basics still work on e.g. macOS + +DM="$1" +VOL="volume_`date +%s`" +IMAGE="${CI_DOCKER_REGISTRY:-`hostname`.local:80/dotmesh}/"$2":${CI_DOCKER_TAG:-latest}" + +sudo "$DM" cluster reset || (sleep 30; sudo "$DM" cluster reset) || true + +echo "### Installing image ${IMAGE}" + +"$DM" cluster init --offline --image "$IMAGE" + +echo "### Testing docker run..." + +docker run --rm -i --name smoke -v "$VOL:/foo" --volume-driver dm ubuntu touch /foo/X + +echo "### Testing list..." + +OUT=`"$DM" list` + +if [[ $OUT == *"$VOL"* ]]; then + echo "String '$VOL' found, yay!" +else + echo "String '$VOL' not found, boo :(" + exit 1 +fi + +echo "### Testing commit..." + +"$DM" switch "$VOL" +"$DM" commit -m 'Test commit' + +OUT=`"$DM" log` + +if [[ $OUT == *"Test commit"* ]]; then + echo "Commit found, yay!" +else + echo "Commit not found, boo :(" + exit 1 +fi + +if [ x$SMOKE_TEST_REMOTE != x ] +then + echo "### Testing push to remote..." + REMOTE="smoke_test_`date +%s`" + echo "$SMOKE_TEST_APIKEY" | "$DM" remote add "$REMOTE" "$SMOKE_TEST_REMOTE" + + "$DM" push "$REMOTE" "$VOL" + + "$DM" remote switch "$REMOTE" + OUT=`"$DM" list` + + if [[ $OUT == *"$VOL"* ]]; then + echo "String '$VOL' found on the remote, yay!" + else + echo "String '$VOL' not found on the remote, boo :(" + exit 1 + fi + + "$DM" remote switch local + "$DM" remote rm "$REMOTE" +fi + +exit 0 diff --git a/smoke.sh b/smoke.sh deleted file mode 100755 index 8899bc618..000000000 --- a/smoke.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash -set -xe - -# Smoke test to see whether basics still work on e.g. macOS - -DM=$1 -VOL="volume_`date +%s`" - -sudo $DM cluster reset || (sleep 30; sudo $DM cluster reset) || true - -$DM cluster init --offline --image dotmesh-server - -docker run --rm -i --name smoke -v $VOL:/foo --volume-driver dm ubuntu touch /foo/X -OUT=`$DM list` - -if [[ $OUT == *"$VOL"* ]]; then - echo "String '$VOL' found, yay!" - exit 0 -else - echo "String '$VOL' not found, boo :(" - exit 1 -fi From 3958e275d6b745f917302af93aee79eb521af548 Mon Sep 17 00:00:00 2001 From: Alaric Snell-Pym Date: Fri, 4 May 2018 14:23:46 +0100 Subject: [PATCH 09/16] #17: Avoid echoing the API key, and run the smoke tests on Linux (it's easier for me to debug them there) --- .gitlab-ci.yml | 14 ++++++++++++++ scripts/smoke.sh | 3 ++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c664cf001..9bd061b3e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -160,6 +160,20 @@ macos_docker_edge: - which docker - docker container prune -f +linux_smoke: + stage: test + dependencies: + - build_server + - build_client_linux + tags: + - ubuntu + - fast + script: + - ./scripts/smoke.sh $(pwd)/binaries/Linux/dm dotmesh-server + - docker version + - which docker + - docker container prune -f + versioner_unit_tests: stage: test tags: diff --git a/scripts/smoke.sh b/scripts/smoke.sh index 4a3271fc5..38939008b 100755 --- a/scripts/smoke.sh +++ b/scripts/smoke.sh @@ -46,7 +46,8 @@ if [ x$SMOKE_TEST_REMOTE != x ] then echo "### Testing push to remote..." REMOTE="smoke_test_`date +%s`" - echo "$SMOKE_TEST_APIKEY" | "$DM" remote add "$REMOTE" "$SMOKE_TEST_REMOTE" + + (set +x; echo "$SMOKE_TEST_APIKEY"; set -x) | "$DM" remote add "$REMOTE" "$SMOKE_TEST_REMOTE" "$DM" push "$REMOTE" "$VOL" From 9f4d3a68b63a649f47279638b7dacd6766a36ebc Mon Sep 17 00:00:00 2001 From: Priya Samuel Date: Fri, 4 May 2018 13:52:14 +0000 Subject: [PATCH 10/16] NFC: Test adding sleep to ensure replication. --- tests/acceptance_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/acceptance_test.go b/tests/acceptance_test.go index 7f108edd5..20685fac4 100644 --- a/tests/acceptance_test.go +++ b/tests/acceptance_test.go @@ -937,6 +937,7 @@ func TestTwoNodesSameCluster(t *testing.T) { citools.RunOnNode(t, node1, "dm commit -m 'First commit'") ensureCurrentDotIsFullyReplicated(t, node1, fsname) ensureCurrentDotIsFullyReplicated(t, node2, fsname) + time.Sleep(3 * time.Second) fsId := strings.TrimSpace(citools.OutputFromRunOnNode(t, node1, "dm dot show -H | grep masterBranchId | cut -f 2")) From a52c39385aefbd769aba16aa4c678babff7036e5 Mon Sep 17 00:00:00 2001 From: Alaric Snell-Pym Date: Fri, 4 May 2018 16:44:21 +0100 Subject: [PATCH 11/16] #17: Pull the right image, use a dedicated config, and test `dm dot delete` on the remote The latter checks if the dot is wedged in some crazy state, as I'm currently seeing in production (the dot sits in `pushPeerState` despite the push completing) --- .gitlab-ci.yml | 17 +++-------------- scripts/smoke.sh | 31 +++++++++++++++++++------------ 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9bd061b3e..4962f8c9c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -127,13 +127,7 @@ macos_docker_stable: - macos - docker-stable script: - - cd cmd/dotmesh-server - # Possible to build just the "prod" image because binaries got passed - # as artifact in cmd/dotmesh-server/target - # TODO: pull the built image from $CI_DOCKER_REGISTRY, rather than - # rebuilding it here. - - docker build -t dotmesh-server . || (sleep 30; docker build -t dotmesh-server .) - - cd ../.. + - docker pull $CI_DOCKER_REGISTRY:dotmesh-server:$CI_DOCKER_TAG - ./scripts/smoke.sh $(pwd)/binaries/Darwin/dm dotmesh-server - docker version - which docker @@ -148,13 +142,7 @@ macos_docker_edge: - macos - docker-edge script: - - cd cmd/dotmesh-server - # Possible to build just the "prod" image because binaries got passed - # as artifact in cmd/dotmesh-server/target - # TODO: pull the built image from $CI_DOCKER_REGISTRY, rather than - # rebuilding it here. - - docker build -t dotmesh-server . || (sleep 30; docker build -t dotmesh-server .) - - cd ../.. + - docker pull $CI_DOCKER_REGISTRY:dotmesh-server:$CI_DOCKER_TAG - ./scripts/smoke.sh $(pwd)/binaries/Darwin/dm dotmesh-server - docker version - which docker @@ -169,6 +157,7 @@ linux_smoke: - ubuntu - fast script: + - docker pull $CI_DOCKER_REGISTRY:dotmesh-server:$CI_DOCKER_TAG - ./scripts/smoke.sh $(pwd)/binaries/Linux/dm dotmesh-server - docker version - which docker diff --git a/scripts/smoke.sh b/scripts/smoke.sh index 38939008b..f541d468d 100755 --- a/scripts/smoke.sh +++ b/scripts/smoke.sh @@ -6,12 +6,14 @@ set -xe DM="$1" VOL="volume_`date +%s`" IMAGE="${CI_DOCKER_REGISTRY:-`hostname`.local:80/dotmesh}/"$2":${CI_DOCKER_TAG:-latest}" +CONFIG=/tmp/smoke_test_$$.dmconfig +trap 'rm "$CONFIG" || true' EXIT -sudo "$DM" cluster reset || (sleep 30; sudo "$DM" cluster reset) || true +sudo "$DM" -c "$CONFIG" cluster reset || (sleep 30; sudo "$DM" cluster reset) || true echo "### Installing image ${IMAGE}" -"$DM" cluster init --offline --image "$IMAGE" +"$DM" -c "$CONFIG" cluster init --offline --image "$IMAGE" echo "### Testing docker run..." @@ -19,7 +21,7 @@ docker run --rm -i --name smoke -v "$VOL:/foo" --volume-driver dm ubuntu touch / echo "### Testing list..." -OUT=`"$DM" list` +OUT=`"$DM" -c "$CONFIG" list` if [[ $OUT == *"$VOL"* ]]; then echo "String '$VOL' found, yay!" @@ -30,10 +32,10 @@ fi echo "### Testing commit..." -"$DM" switch "$VOL" -"$DM" commit -m 'Test commit' +"$DM" -c "$CONFIG" switch "$VOL" +"$DM" -c "$CONFIG" commit -m 'Test commit' -OUT=`"$DM" log` +OUT=`"$DM" -c "$CONFIG" log` if [[ $OUT == *"Test commit"* ]]; then echo "Commit found, yay!" @@ -47,12 +49,12 @@ then echo "### Testing push to remote..." REMOTE="smoke_test_`date +%s`" - (set +x; echo "$SMOKE_TEST_APIKEY"; set -x) | "$DM" remote add "$REMOTE" "$SMOKE_TEST_REMOTE" + (set +x; echo "$SMOKE_TEST_APIKEY"; set -x) | "$DM" -c "$CONFIG" remote add "$REMOTE" "$SMOKE_TEST_REMOTE" - "$DM" push "$REMOTE" "$VOL" + "$DM" -c "$CONFIG" push "$REMOTE" "$VOL" - "$DM" remote switch "$REMOTE" - OUT=`"$DM" list` + "$DM" -c "$CONFIG" remote switch "$REMOTE" + OUT=`"$DM" -c "$CONFIG" list` if [[ $OUT == *"$VOL"* ]]; then echo "String '$VOL' found on the remote, yay!" @@ -61,8 +63,13 @@ then exit 1 fi - "$DM" remote switch local - "$DM" remote rm "$REMOTE" + echo "### Testing delete on remote..." + + REMOTE_NAME="`echo $SMOKE_TEST_REMOTE | sed s/@.*$//`" + "$DM" dot delete -f "$REMOTE_NAME"/"$VOL" + + "$DM" -c "$CONFIG" remote switch local + "$DM" -c "$CONFIG" remote rm "$REMOTE" fi exit 0 From 4db4cce5f6ceee9e804c5998188ce6c11fb38d62 Mon Sep 17 00:00:00 2001 From: Alaric Snell-Pym Date: Fri, 4 May 2018 17:05:02 +0100 Subject: [PATCH 12/16] FIX: Typo... --- .gitlab-ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4962f8c9c..287f2caef 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -127,7 +127,7 @@ macos_docker_stable: - macos - docker-stable script: - - docker pull $CI_DOCKER_REGISTRY:dotmesh-server:$CI_DOCKER_TAG + - docker pull $CI_DOCKER_REGISTRY/dotmesh-server:$CI_DOCKER_TAG - ./scripts/smoke.sh $(pwd)/binaries/Darwin/dm dotmesh-server - docker version - which docker @@ -142,7 +142,7 @@ macos_docker_edge: - macos - docker-edge script: - - docker pull $CI_DOCKER_REGISTRY:dotmesh-server:$CI_DOCKER_TAG + - docker pull $CI_DOCKER_REGISTRY/dotmesh-server:$CI_DOCKER_TAG - ./scripts/smoke.sh $(pwd)/binaries/Darwin/dm dotmesh-server - docker version - which docker @@ -157,7 +157,7 @@ linux_smoke: - ubuntu - fast script: - - docker pull $CI_DOCKER_REGISTRY:dotmesh-server:$CI_DOCKER_TAG + - docker pull $CI_DOCKER_REGISTRY/dotmesh-server:$CI_DOCKER_TAG - ./scripts/smoke.sh $(pwd)/binaries/Linux/dm dotmesh-server - docker version - which docker From 3ffc741583e09fcc0261c05dde9547ef4b4b6869 Mon Sep 17 00:00:00 2001 From: Alaric Snell-Pym Date: Fri, 4 May 2018 17:16:34 +0100 Subject: [PATCH 13/16] FIX: Missed the `-c` option to the `dm dot delete...` --- scripts/smoke.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/smoke.sh b/scripts/smoke.sh index f541d468d..016b6d414 100755 --- a/scripts/smoke.sh +++ b/scripts/smoke.sh @@ -1,11 +1,15 @@ #!/usr/bin/env bash set -xe -# Smoke test to see whether basics still work on e.g. macOS +# Smoke test to see whether basics still work on e.g. macOS; also tests the DM="$1" VOL="volume_`date +%s`" IMAGE="${CI_DOCKER_REGISTRY:-`hostname`.local:80/dotmesh}/"$2":${CI_DOCKER_TAG:-latest}" + +# We use a bespoke config path to isolate us from other runs (although +# we do hog the node's docker state, so it's far from perfect) + CONFIG=/tmp/smoke_test_$$.dmconfig trap 'rm "$CONFIG" || true' EXIT @@ -66,7 +70,7 @@ then echo "### Testing delete on remote..." REMOTE_NAME="`echo $SMOKE_TEST_REMOTE | sed s/@.*$//`" - "$DM" dot delete -f "$REMOTE_NAME"/"$VOL" + "$DM" -c "$CONFIG"dot delete -f "$REMOTE_NAME"/"$VOL" "$DM" -c "$CONFIG" remote switch local "$DM" -c "$CONFIG" remote rm "$REMOTE" From 72568a8d75b82f8898c7fc67b903b1df282c35f8 Mon Sep 17 00:00:00 2001 From: Alaric Snell-Pym Date: Fri, 4 May 2018 17:30:01 +0100 Subject: [PATCH 14/16] FIX: Missed space :-( Testing stuff in CI is tedious. --- scripts/smoke.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/smoke.sh b/scripts/smoke.sh index 016b6d414..4a04484d1 100755 --- a/scripts/smoke.sh +++ b/scripts/smoke.sh @@ -70,7 +70,7 @@ then echo "### Testing delete on remote..." REMOTE_NAME="`echo $SMOKE_TEST_REMOTE | sed s/@.*$//`" - "$DM" -c "$CONFIG"dot delete -f "$REMOTE_NAME"/"$VOL" + "$DM" -c "$CONFIG" dot delete -f "$REMOTE_NAME"/"$VOL" "$DM" -c "$CONFIG" remote switch local "$DM" -c "$CONFIG" remote rm "$REMOTE" From 5f10cb4cc35da1093504c2644b7769e9910ae794 Mon Sep 17 00:00:00 2001 From: Luke Marsden Date: Tue, 8 May 2018 15:38:35 +0100 Subject: [PATCH 15/16] dotscience#3 make subdot roots writeable by all, for containers which run as non-root --- cmd/dotmesh-server/pkg/main/docker.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/dotmesh-server/pkg/main/docker.go b/cmd/dotmesh-server/pkg/main/docker.go index 9cfeb6274..e36bc4b09 100644 --- a/cmd/dotmesh-server/pkg/main/docker.go +++ b/cmd/dotmesh-server/pkg/main/docker.go @@ -129,7 +129,7 @@ func newContainerMountSymlink(name VolumeName, filesystemId string, subvolume st // Do we need to create the subvolume directory? if _, err := os.Stat(result); err != nil { if os.IsNotExist(err) { - if err := os.MkdirAll(result, 0755); err != nil { + if err := os.MkdirAll(result, 0777); err != nil { log.Printf("[newContainerMountSymlink] error creating subdot %s: %+v", result, err) return "", err } From 277d71f489229867ba0b34244ed15ff26bf6ecbd Mon Sep 17 00:00:00 2001 From: Priya Samuel Date: Tue, 8 May 2018 15:03:12 +0000 Subject: [PATCH 16/16] NFC: More logging --- tests/acceptance_test.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/acceptance_test.go b/tests/acceptance_test.go index 20685fac4..99f02d2c8 100644 --- a/tests/acceptance_test.go +++ b/tests/acceptance_test.go @@ -935,8 +935,8 @@ func TestTwoNodesSameCluster(t *testing.T) { citools.RunOnNode(t, node1, citools.DockerRun(fsname)+" sh -c 'echo WORLD > /foo/HELLO'") citools.RunOnNode(t, node1, "dm switch "+fsname) citools.RunOnNode(t, node1, "dm commit -m 'First commit'") - ensureCurrentDotIsFullyReplicated(t, node1, fsname) - ensureCurrentDotIsFullyReplicated(t, node2, fsname) + ensureDotIsFullyReplicated(t, node1, fsname) + ensureDotIsFullyReplicated(t, node2, fsname) time.Sleep(3 * time.Second) fsId := strings.TrimSpace(citools.OutputFromRunOnNode(t, node1, "dm dot show -H | grep masterBranchId | cut -f 2")) @@ -2232,12 +2232,14 @@ spec: citools.DumpTiming() } -func ensureCurrentDotIsFullyReplicated(t *testing.T, node string, fsname string) { +func ensureDotIsFullyReplicated(t *testing.T, node string, fsname string) { for try := 1; try <= 5; try++ { - st := citools.OutputFromRunOnNode(t, node, fmt.Sprintf("dm dot show %s | grep missing || true", fsname)) - if st == "" { + st := citools.OutputFromRunOnNode(t, node, fmt.Sprintf("dm dot show %s", fsname)) + if !strings.Contains(st, "missing") { + fmt.Print("Replicated") return } else { + fmt.Print("Failed to replicate, sleeping and retrying") time.Sleep(1 * time.Second) } }