forked from cockroachdb/cockroach
/
start.go
243 lines (217 loc) · 7.72 KB
/
start.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
// Copyright 2015 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License. See the AUTHORS file
// for names of contributors.
//
// Author: Andrew Bonventre (andybons@gmail.com)
// Author: Spencer Kimball (spencer.kimball@gmail.com)
package cli
import (
"fmt"
"os"
"os/signal"
"syscall"
"time"
"github.com/cockroachdb/cockroach/client"
"github.com/cockroachdb/cockroach/security"
"github.com/cockroachdb/cockroach/server"
"github.com/cockroachdb/cockroach/storage/engine"
"github.com/cockroachdb/cockroach/util"
"github.com/cockroachdb/cockroach/util/log"
"github.com/cockroachdb/cockroach/util/stop"
"github.com/cockroachdb/cockroach/util/uuid"
"github.com/spf13/cobra"
)
// Context is the CLI Context used for the server.
var Context = server.NewContext()
// initCmd command initializes a new Cockroach cluster.
var initCmd = &cobra.Command{
Use: "init --stores=...",
Short: "init new Cockroach cluster and start server",
Long: `
Initialize a new Cockroach cluster using the --stores flag to specify one or
more storage locations. The first of these storage locations is used to
bootstrap the first replica of the first range. If any of the storage locations
are already part of a pre-existing cluster, the bootstrap will fail.
`,
Example: ` cockroach init --stores=ssd=/mnt/ssd1,ssd=/mnt/ssd2`,
Run: runInit,
}
// runInit initializes the engine based on the first
// store. The bootstrap engine may not be an in-memory type.
func runInit(cmd *cobra.Command, args []string) {
// Default user for servers.
Context.User = security.NodeUser
// First initialize the Context as it is used in other places.
err := Context.Init("init")
if err != nil {
log.Errorf("failed to initialize context: %s", err)
return
}
// Generate a new UUID for cluster ID and bootstrap the cluster.
clusterID := uuid.NewUUID4().String()
stopper := stop.NewStopper()
if _, err := server.BootstrapCluster(clusterID, Context.Engines, stopper); err != nil {
log.Errorf("unable to bootstrap cluster: %s", err)
return
}
stopper.Stop()
log.Infof("cockroach cluster %s has been initialized", clusterID)
}
// startCmd command starts nodes by joining the gossip network.
var startCmd = &cobra.Command{
Use: "start",
Short: "start a node by joining the gossip network",
Long: `
Start a Cockroach node by joining the gossip network and exporting key ranges
stored on physical device(s). The gossip network is joined by contacting one or
more well-known hosts specified by the --gossip flag. Every node should be run
with the same list of bootstrap hosts to guarantee a connected network. An
alternate approach is to use a single host for --gossip and round-robin DNS.
Each node exports data from one or more physical devices. These devices are
specified via the --stores flag. This is a comma-separated list of paths to
storage directories or for in-memory stores, the number of bytes. Although the
paths should be specified to correspond uniquely to physical devices, this
requirement isn't strictly enforced. See the --stores flag help description for
additional details.`,
Example: ` cockroach start --certs=<dir> --gossip=host1:port1[,...] --stores=ssd=/mnt/ssd1,...`,
Run: runStart,
}
// runStart starts the cockroach node using --stores as the list of
// storage devices ("stores") on this machine and --gossip as the list
// of "well-known" hosts used to join this node to the cockroach
// cluster via the gossip network.
func runStart(cmd *cobra.Command, args []string) {
info := util.GetBuildInfo()
log.Infof("build Vers: %s", info.Vers)
log.Infof("build Tag: %s", info.Tag)
log.Infof("build Time: %s", info.Time)
log.Infof("build Deps: %s", info.Deps)
// Default user for servers.
Context.User = security.NodeUser
// First initialize the Context as it is used in other places.
err := Context.Init("start")
if err != nil {
log.Errorf("failed to initialize context: %s", err)
return
}
log.Info("starting cockroach cluster")
stopper := stop.NewStopper()
s, err := server.NewServer(Context, stopper)
if err != nil {
log.Errorf("failed to start Cockroach server: %s", err)
return
}
err = s.Start(false)
if err != nil {
log.Errorf("cockroach server exited with error: %s", err)
return
}
signalCh := make(chan os.Signal, 1)
signal.Notify(signalCh, os.Interrupt, os.Kill)
// TODO(spencer): move this behind a build tag.
signal.Notify(signalCh, syscall.SIGTERM)
// Block until one of the signals above is received or the stopper
// is stopped externally (for example, via the quit endpoint).
select {
case <-stopper.ShouldStop():
case <-signalCh:
go s.Stop()
}
log.Info("initiating graceful shutdown of server")
go func() {
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
for {
select {
case <-ticker.C:
if log.V(1) {
log.Infof("running tasks:\n%s", stopper.RunningTasks())
}
log.Infof("%d running tasks", stopper.NumTasks())
case <-stopper.ShouldStop():
return
}
}
}()
select {
case <-signalCh:
log.Warningf("second signal received, initiating hard shutdown")
case <-time.After(time.Minute):
log.Warningf("time limit reached, initiating hard shutdown")
case <-stopper.IsStopped():
log.Infof("server drained and shutdown completed")
}
log.Flush()
}
// exterminateCmd command shuts down the node server and
// destroys all data held by the node.
var exterminateCmd = &cobra.Command{
Use: "exterminate",
Short: "destroy all data held by the node",
Long: `
First shuts down the system and then destroys all data held by the
node, cycling through each store specified by the --stores flag.
`,
Run: runExterminate,
}
// runExterminate destroys the data held in the specified stores.
func runExterminate(cmd *cobra.Command, args []string) {
err := Context.Init("exterminate")
if err != nil {
log.Errorf("failed to initialize context: %s", err)
return
}
// First attempt to shutdown the server. Note that an error of EOF just
// means the HTTP server shutdown before the request to quit returned.
admin := client.NewAdminClient(&Context.Context, Context.Addr, client.Quit)
body, err := admin.Get()
if err != nil {
log.Infof("shutdown node %s: %s", Context.Addr, err)
} else {
log.Infof("shutdown node in anticipation of data extermination: %s", body)
}
// Exterminate all data held in specified stores.
for _, e := range Context.Engines {
if rocksdb, ok := e.(*engine.RocksDB); ok {
log.Infof("exterminating data from store %s", e)
if err := rocksdb.Destroy(); err != nil {
log.Errorf("unable to destroy store %s: %s", e, err)
osExit(1)
}
}
}
log.Infof("exterminated all data from stores %s", Context.Engines)
}
// quitCmd command shuts down the node server.
var quitCmd = &cobra.Command{
Use: "quit",
Short: "drain and shutdown node\n",
Long: `
Shutdown the server. The first stage is drain, where any new requests
will be ignored by the server. When all extant requests have been
completed, the server exits.
`,
Run: runQuit,
}
// runQuit accesses the quit shutdown path.
func runQuit(cmd *cobra.Command, args []string) {
admin := client.NewAdminClient(&Context.Context, Context.Addr, client.Quit)
body, err := admin.Get()
if err != nil {
fmt.Printf("shutdown node error: %s\n", err)
osExit(1)
return
}
fmt.Printf("node drained and shutdown: %s\n", body)
}