forked from vitessio/vitess
-
Notifications
You must be signed in to change notification settings - Fork 0
/
reparent.go
134 lines (120 loc) · 4.18 KB
/
reparent.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
// Copyright 2012, Google Inc. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mysqlctl
import (
"fmt"
"time"
log "github.com/golang/glog"
"github.com/youtube/vitess/go/vt/mysqlctl/proto"
)
// if the master is still alive, then we need to demote it gracefully
// make it read-only, flush the writes and get the position
func (mysqld *Mysqld) DemoteMaster() (*proto.ReplicationPosition, error) {
// label as TYPE_REPLICA
mysqld.SetReadOnly(true)
cmds := []string{
"FLUSH TABLES WITH READ LOCK",
"UNLOCK TABLES",
}
if err := mysqld.executeSuperQueryList(cmds); err != nil {
return nil, err
}
return mysqld.MasterStatus()
}
// setReadWrite: set the new master in read-write mode.
//
// replicationState: info slaves need to reparent themselves
// waitPosition: slaves can wait for this position when restarting replication
// timePromoted: this timestamp (unix nanoseconds) is inserted into _vt.replication_log to verify the replication config
func (mysqld *Mysqld) PromoteSlave(setReadWrite bool, hookExtraEnv map[string]string) (replicationState *proto.ReplicationState, waitPosition *proto.ReplicationPosition, timePromoted int64, err error) {
if err = mysqld.StopSlave(hookExtraEnv); err != nil {
return
}
// If we are forced, we have to get our status as a master, not a slave.
lastRepPos, err := mysqld.SlaveStatus()
if err == ErrNotSlave {
lastRepPos, err = mysqld.MasterStatus()
}
if err != nil {
return
}
cmds := mysqld.flavor.PromoteSlaveCommands()
if err = mysqld.executeSuperQueryList(cmds); err != nil {
return
}
replicationPosition, err := mysqld.MasterStatus()
if err != nil {
return
}
mysqldAddr := mysqld.IpAddr()
replicationState, err = proto.NewReplicationState(mysqldAddr)
if err != nil {
return
}
replicationState.ReplicationPosition = *replicationPosition
lastPos := lastRepPos.MapKey()
newAddr := replicationState.MasterAddr()
newPos := replicationState.ReplicationPosition.MapKey()
timePromoted = time.Now().UnixNano()
// write a row to verify that replication is functioning
cmds = []string{
fmt.Sprintf("INSERT INTO _vt.replication_log (time_created_ns, note) VALUES (%v, 'reparent check')", timePromoted),
}
if err = mysqld.executeSuperQueryList(cmds); err != nil {
return
}
// this is the wait-point for checking replication
waitPosition, err = mysqld.MasterStatus()
if err != nil {
return
}
if waitPosition.MasterLogFile == replicationPosition.MasterLogFile && waitPosition.MasterLogPosition == replicationPosition.MasterLogPosition {
// we inserted a row, but our binlog position didn't
// change. This is a serious problem. we don't want to
// ever promote a master like that.
err = fmt.Errorf("cannot promote slave to master, non-functional binlogs")
return
}
cmds = []string{
fmt.Sprintf("INSERT INTO _vt.reparent_log (time_created_ns, last_position, new_addr, new_position, wait_position) VALUES (%v, '%v', '%v', '%v', '%v')", timePromoted, lastPos, newAddr, newPos, waitPosition.MapKey()),
}
if err = mysqld.executeSuperQueryList(cmds); err != nil {
return
}
if setReadWrite {
err = mysqld.SetReadOnly(false)
}
return
}
func (mysqld *Mysqld) RestartSlave(replicationState *proto.ReplicationState, waitPosition *proto.ReplicationPosition, timeCheck int64) error {
log.Infof("Restart Slave")
cmds, err := StartReplicationCommands(mysqld, replicationState)
if err != nil {
return err
}
if err := mysqld.executeSuperQueryList(cmds); err != nil {
return err
}
if err := mysqld.WaitForSlaveStart(SlaveStartDeadline); err != nil {
return err
}
if err := mysqld.WaitMasterPos(waitPosition, 0); err != nil {
return err
}
return mysqld.CheckReplication(timeCheck)
}
// Check for the magic row inserted under controlled reparenting.
func (mysqld *Mysqld) CheckReplication(timeCheck int64) error {
log.Infof("Check replication restarted")
checkQuery := fmt.Sprintf("SELECT * FROM _vt.replication_log WHERE time_created_ns = %v",
timeCheck)
qr, err := mysqld.fetchSuperQuery(checkQuery)
if err != nil {
return err
}
if len(qr.Rows) != 1 {
return fmt.Errorf("replication failed - unexpected row count %v", len(qr.Rows))
}
return nil
}