/
failure_scenarios.go
179 lines (144 loc) · 6.22 KB
/
failure_scenarios.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
package a1upgrade
import (
"fmt"
"os"
"strconv"
)
/*
initialConditions represents the idempotence scenarios we wish to demonstrate
in this prototype.
*/
type initialCondition int
const (
// No upgrade steps are completed yet
defaultCondition initialCondition = 0 // nolint: deadcode,varcheck
// Case where we have a user-provided config file or we have one that we
// generated previously
configExists initialCondition = 100 // nolint: deadcode,varcheck
// Extracted a1 data is present on disk, but a1 is fully up
// Maybe we make sure we rename the a1 export file in our Failure cases, then
// we don't worry about this case any more?
a1DataExtracteda1Up initialCondition = 200 // nolint: deadcode,varcheck
// Extracted a1 data is present on disk and a1 is down
a1DataExtracteda1Down initialCondition = 201 // nolint: deadcode,varcheck
)
var requestedScenario initialCondition
// FailureScenario represents a set of failure conditions that we want to be
// able to test and demo. To invoke a given failure condition in an e2e test,
// set FAILURE=number on the command line. To check if the user has requested a
// certain failure scenario, use the Failure() function.
type FailureScenario int
const (
// NoFailure indicates that the user hasn't asked to exercise a specific
// failure case
NoFailure FailureScenario = 0
// PreflightNoA1 is the case we're running upgrade on a host that doesn't
// have automate v1 installed.
PreflightNoA1 FailureScenario = 100
// PreflightA1TooOld is the case that the installed automate v1 doesn't meet
// our minimum version requirement for upgrading
PreflightA1TooOld FailureScenario = 101
// PreflightA1Down is the case that automate v1 is not running. We need a1 to
// be up so we can export data from postgres
PreflightA1Down FailureScenario = 102
// PreflightA1Unhealthy is the case that a1's status check is failing. We
// just don't want to deal with all the ways that could possibly break
// upgrades.
PreflightA1Unhealthy FailureScenario = 103
// Makes the database checker return 1TB as the size of every postgres
// database, which will make the preflight fail unless you have a few TB of
// disk.
PreflightPostgresTooBig FailureScenario = 104
// ConfigConflictFIPS is the case that a1 is configured for FIPS, which we
// don't support currently
ConfigConflictFIPS FailureScenario = 200
// ConfigConflictNoBackup is the case that a1 doesn't have backups
// configured. We want to very strongly encourage users to backup before
// upgrading in case the upgrade doesn't work.
ConfigConflictNoBackup FailureScenario = 201
// ConfigConflictSAML is the case that a1 is configured with SAML, which we
// don't migrate currently
ConfigConflictSAML FailureScenario = 202
// MaintModeDataCollectorNotDown is the case that we attempted to put a1 in
// maintenance mode but the data collector URL isn't serving 503s
MaintModeDataCollectorNotDown FailureScenario = 300
// MaintModeDataCollectorRetryTest indicates that the test harness should
// fail to return a 503 for the data collector URL a few times before
// returning a 503
MaintModeDataCollectorRetryTest FailureScenario = 302
// MaintModeQueuedDataNotProcessed is the case that we put a1 in maintenance
// mode but the data collector queue didn't fully drain within some timeout
// period.
MaintModeQueuedDataNotProcessed FailureScenario = 301
// MaintModeQueuedDataRetryTest indicates that the test harness should return
// a non-zero queue depth from the stub rabbitmq management API a few times
// before returning a zero queue depth.
MaintModeQueuedDataRetryTest FailureScenario = 303
// MaintModeRequiredRecipeNotDown is the case that we attempted to put A1 in
// maintenance mode but the Chef server isn't serving 503s
MaintModeRequiredRecipeNotDown FailureScenario = 304
// MaintModeRequiredRecipeRetryTest will fail to return a 503 for the
// required_recipe URL a few times before returning a 503
MaintModeRequiredRecipeRetryTest FailureScenario = 305
// A1BackupFail is the case that the backup of a1 failed
A1BackupFail FailureScenario = 400
// ExtractA1DataFail is the case that we failed for some reason to dump a1's
// postgres or copy some of the other data.
ExtractA1DataFail FailureScenario = 500
// A1ShutdownFail is the case that `automate-ctl stop` errored out for some
// reason.
A1ShutdownFail FailureScenario = 600
// ChefServerShutdownFail is the case that `chef-server-ctl stop` errored out
// for some reason
ChefServerShutdownFail FailureScenario = 601
// ChefServerShowSecretFail is the case that `chef-server-ctl
// show-secret` errored out for some reason
ChefServerShowSecretFail FailureScenario = 602
// DataImportFailPostgres is the case that we couldn't load a1's pg data into
// a2's pg
DataImportFailPostgres FailureScenario = 700
// DataImportFailElastic is the case that we couldn't load a1's es data into
// a2's es
DataImportFailElastic FailureScenario = 701
// Stage1MigrationFail is the case that domain services failed to ingest the
// staged a1 data for some reason (or otherwise failed to start)
Stage1MigrationFail FailureScenario = 800
)
var requestedFailure FailureScenario
var FailureInt int
func init() {
scenarioValue, scenarioSet := os.LookupEnv("SCENARIO")
if scenarioSet {
var scenarioInt int
_, err := fmt.Sscanf(scenarioValue, "%d", &scenarioInt)
if err != nil {
msg := fmt.Sprintf("failed to parse SCENARIO env var to int (value: %s): %v\n", scenarioValue, err)
panic(msg)
}
requestedScenario = initialCondition(scenarioInt)
}
FailureValue, FailureSet := os.LookupEnv("FAILURE")
if FailureSet && FailureValue != "" {
var err error
FailureInt, err = strconv.Atoi(FailureValue)
if err != nil {
msg := fmt.Sprintf("FAILURE environment variable '%s' must be an integer", FailureValue)
panic(msg)
}
requestedFailure = FailureScenario(FailureInt)
}
}
func condition(maybeCondition initialCondition) bool { // nolint: deadcode
return maybeCondition == requestedScenario
}
// Failure determines if the given maybeFailure FailureScenario was set by the
// user. EX:
//
// if Failure(A1ShutdownFail) {
// return aCannedError()
// }
//
// // normal program execution...
func Failure(maybeFailure FailureScenario) bool {
return maybeFailure == requestedFailure
}