-
Notifications
You must be signed in to change notification settings - Fork 63
/
clustersplit.go
130 lines (116 loc) · 3.66 KB
/
clustersplit.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
package sanitycheck
import (
"fmt"
"time"
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/util/errors"
"github.com/amadeusitgroup/redis-operator/pkg/config"
"github.com/amadeusitgroup/redis-operator/pkg/redis"
)
// FixClusterSplit use to detect and fix Cluster split
func FixClusterSplit(admin redis.AdminInterface, config *config.Redis, infos *redis.ClusterInfos, dryRun bool) (bool, error) {
clusters := buildClustersLists(infos)
if len(clusters) > 1 {
if dryRun {
return true, nil
}
return true, reassignClusters(admin, config, clusters)
}
glog.V(3).Info("[SanityChecks] No split cluster detected")
return false, nil
}
type cluster []string
func reassignClusters(admin redis.AdminInterface, config *config.Redis, clusters []cluster) error {
glog.Error("[SanityChecks] Cluster split detected, the Redis manager will recover from the issue, but data may be lost")
var errs []error
// only one cluster may remain
mainCluster, badClusters := splitMainCluster(clusters)
if len(mainCluster) == 0 {
glog.Error("[SanityChecks] Impossible to fix cluster split, cannot elect main cluster")
return fmt.Errorf("Impossible to fix cluster split, cannot elect main cluster")
}
glog.Infof("[SanityChecks] Cluster '%s' is elected as main cluster", mainCluster)
// reset admin to connect to the correct cluster
admin.Connections().ReplaceAll(mainCluster)
// reconfigure bad clusters
for _, cluster := range badClusters {
glog.Warningf("[SanityChecks] All keys stored in redis cluster '%s' will be lost", cluster)
clusterAdmin := redis.NewAdmin(cluster,
&redis.AdminOptions{
ConnectionTimeout: time.Duration(config.DialTimeout) * time.Millisecond,
RenameCommandsFile: config.GetRenameCommandsFile(),
})
for _, nodeAddr := range cluster {
if err := clusterAdmin.FlushAndReset(nodeAddr, redis.ResetHard); err != nil {
glog.Errorf("unable to flush the node: %s, err:%v", nodeAddr, err)
errs = append(errs, err)
}
if err := admin.AttachNodeToCluster(nodeAddr); err != nil {
glog.Errorf("unable to attach the node: %s, err:%v", nodeAddr, err)
errs = append(errs, err)
}
}
clusterAdmin.Close()
}
return errors.NewAggregate(errs)
}
func splitMainCluster(clusters []cluster) (cluster, []cluster) {
if len(clusters) == 0 {
return cluster{}, []cluster{}
}
// only the bigger cluster is kept, or the first one if several cluster have the same size
maincluster := -1
maxSize := 0
for i, c := range clusters {
if len(c) > maxSize {
maxSize = len(c)
maincluster = i
}
}
if maincluster != -1 {
main := clusters[maincluster]
return main, append(clusters[:maincluster], clusters[maincluster+1:]...)
}
return clusters[0], []cluster{}
}
// buildClustersLists build a list of independant clusters
// we could have cluster partially overlapping in case of inconsistent cluster view
func buildClustersLists(infos *redis.ClusterInfos) []cluster {
clusters := []cluster{}
for _, nodeinfos := range infos.Infos {
if nodeinfos == nil || nodeinfos.Node == nil {
continue
}
slice := append(nodeinfos.Friends, nodeinfos.Node)
var c cluster
// build list of addresses
for _, node := range slice {
if len(node.FailStatus) == 0 {
c = append(c, node.IPPort())
}
}
// check if this cluster overlap with another
overlap := false
for _, node := range c {
if findInCluster(node, clusters) {
overlap = true
break
}
}
// if this is a new cluster, add it
if !overlap {
clusters = append(clusters, c)
}
}
return clusters
}
func findInCluster(addr string, clusters []cluster) bool {
for _, c := range clusters {
for _, nodeAddr := range c {
if addr == nodeAddr {
return true
}
}
}
return false
}