/
RaftCluster.java
335 lines (315 loc) · 18.3 KB
/
RaftCluster.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
/*
* Copyright 2016 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
package io.atomix.protocols.raft.cluster;
import io.atomix.cluster.NodeId;
import io.atomix.protocols.raft.RaftServer;
import java.util.Arrays;
import java.util.Collection;
import java.util.concurrent.CompletableFuture;
import java.util.function.Consumer;
/**
* Copycat server cluster API.
* <p>
* This class provides the view of the Copycat cluster from the perspective of a single server. When a
* {@link RaftServer CopycatServer} is started, the server will form a cluster
* with other servers. Each Copycat cluster consists of some set of {@link #members() members}, and each
* {@link RaftMember} represents a single server in the cluster. Users can use the {@code Cluster} to react to
* state changes in the underlying Raft algorithm via the various listeners.
* <p>
* <pre>
* {@code
* server.cluster().onJoin(member -> {
* System.out.println(member.address() + " joined the cluster!");
* });
* }
* </pre>
* Membership exposed via this interface is provided from the perspective of the local server and may not
* necessarily be consistent with cluster membership from the perspective of other nodes. The only consistent
* membership list is on the {@link #leader() leader} node.
* <h2>Cluster management</h2>
* Users can use the {@code Cluster} to manage the Copycat cluster membership. Typically, servers join the
* cluster by calling {@link RaftServer#bootstrap(NodeId...)} or {@link #join(NodeId...)},
* but in the event that a server fails permanently and thus cannot remove itself, other nodes can remove arbitrary servers.
* <p>
* <pre>
* {@code
* server.cluster().onJoin(member -> {
* member.remove().thenRun(() -> System.out.println("Removed " + member.address() + " from the cluster!"));
* });
* }
* </pre>
* When a member is removed from the cluster, the configuration change removing the member will be replicated to all
* the servers in the cluster and persisted to disk. Once a member has been removed, for that member to rejoin the cluster
* it must fully restart and request to rejoin the cluster. For servers configured with a persistent
* {@link io.atomix.protocols.raft.storage.StorageLevel}, cluster configurations are stored on disk.
* <p>
* Additionally, members can be {@link RaftMember#promote() promoted} and {@link RaftMember#demote() demoted} by any
* other member of the cluster. When a member state is changed, a cluster configuration change request is sent
* to the cluster leader where it's logged and replicated through the Raft consensus algorithm. <em>During</em>
* the configuration change, servers' cluster configurations will be updated. Once the configuration change is
* complete, it will be persisted to disk on all servers and is guaranteed not to be lost even in the event of a
* full cluster shutdown (assuming the server uses a persistent {@link io.atomix.protocols.raft.storage.StorageLevel}).
*
* @author <a href="http://github.com/kuujo>Jordan Halterman</a>
*/
public interface RaftCluster {
/**
* Returns the current cluster leader.
* <p>
* If no leader has been elected for the current {@link #term() term}, the leader will be {@code null}.
* Once a leader is elected, the leader must be known to the local server's configuration. If the returned
* {@link RaftMember} is {@code null} then that does not necessarily indicate that no leader yet exists for the
* current term, only that the local server has not learned of a valid leader for the term.
*
* @return The current cluster leader or {@code null} if no leader is known for the current term.
*/
RaftMember leader();
/**
* Returns the current cluster term.
* <p>
* The term is representative of the epoch determined by the underlying Raft consensus algorithm. The term is a monotonically
* increasing number used by Raft to represent a point in logical time. If the cluster is persistent (i.e. all servers use a persistent
* {@link io.atomix.protocols.raft.storage.StorageLevel}), the term is guaranteed to be unique and monotonically increasing even across
* cluster restarts. Additionally, for any given term, Raft guarantees that only a single {@link #leader() leader} can be elected.
*
* @return The current cluster term.
*/
long term();
/**
* Adds a listener to be called when a leader is elected.
* <p>
* The provided {@code callback} will be called when a new leader is elected for a term. Because Raft ensures only a single leader
* can be elected for any given term, each election callback will be called at most once per term. However, note that a leader may
* not be elected for a term as well.
* <pre>
* {@code
* server.cluster().onLeaderElection(member -> {
* System.out.println(member.address() + " elected for term " + server.cluster().term());
* });
* }
* </pre>
* The {@link RaftMember} provided to the callback represents the member that was elected leader. Copycat guarantees that this member is
* a member of the {@link RaftCluster}. When a leader election callback is called, the correct {@link #term()} for the leader is guaranteed
* to have already been set. Thus, to get the term for the provided leader, simply read the cluster {@link #term()}.
*
* @param listener The listener to be called when a new leader is elected.
*/
void addLeaderElectionListener(Consumer<RaftMember> listener);
/**
* Removes a leader election listener from the cluster.
*
* @param listener The leader election listener to remove.
*/
void removeLeaderElectionListener(Consumer<RaftMember> listener);
/**
* Returns the local cluster member.
*
* @return The local cluster member.
*/
RaftMember member();
/**
* Returns a member by ID.
* <p>
* The returned {@link RaftMember} is referenced by the unique {@link RaftMember#id()}.
*
* @param id The member ID.
* @return The member or {@code null} if no member with the given {@code id} exists.
*/
RaftMember member(NodeId id);
/**
* Returns a collection of all cluster members.
* <p>
* The returned members are representative of the last configuration known to the local server. Over time,
* the cluster configuration may change. In the event of a membership change, the returned {@link Collection}
* will not be modified, but instead a new collection will be created. Similarly, modifying the returned
* collection will have no impact on the cluster membership.
*
* @return A collection of all cluster members.
*/
Collection<RaftMember> members();
/**
* Bootstraps the cluster.
* <p>
* Bootstrapping the cluster results in a new cluster being formed with the provided configuration. The initial
* nodes in a cluster must always be bootstrapped. This is necessary to prevent split brain. If the provided
* configuration is empty, the local server will form a single-node cluster.
* <p>
* Only {@link RaftMember.Type#ACTIVE} members can be included in a bootstrap configuration. If the local server is
* not initialized as an active member, it cannot be part of the bootstrap configuration for the cluster.
* <p>
* When the cluster is bootstrapped, the local server will be transitioned into the active state and begin
* participating in the Raft consensus algorithm. When the cluster is first bootstrapped, no leader will exist.
* The bootstrapped members will elect a leader amongst themselves. Once a cluster has been bootstrapped, additional
* members may be {@link #join(NodeId...) joined} to the cluster. In the event that the bootstrapped members cannot
* reach a quorum to elect a leader, bootstrap will continue until successful.
* <p>
* It is critical that all servers in a bootstrap configuration be started with the same exact set of members.
* Bootstrapping multiple servers with different configurations may result in split brain.
* <p>
* The {@link CompletableFuture} returned by this method will be completed once the cluster has been bootstrapped,
* a leader has been elected, and the leader has been notified of the local server's client configurations.
*
* @param cluster The bootstrap cluster configuration.
* @return A completable future to be completed once the cluster has been bootstrapped.
*/
default CompletableFuture<Void> bootstrap(NodeId... cluster) {
return bootstrap(Arrays.asList(cluster));
}
/**
* Bootstraps the cluster.
* <p>
* Bootstrapping the cluster results in a new cluster being formed with the provided configuration. The initial
* nodes in a cluster must always be bootstrapped. This is necessary to prevent split brain. If the provided
* configuration is empty, the local server will form a single-node cluster.
* <p>
* Only {@link RaftMember.Type#ACTIVE} members can be included in a bootstrap configuration. If the local server is
* not initialized as an active member, it cannot be part of the bootstrap configuration for the cluster.
* <p>
* When the cluster is bootstrapped, the local server will be transitioned into the active state and begin
* participating in the Raft consensus algorithm. When the cluster is first bootstrapped, no leader will exist.
* The bootstrapped members will elect a leader amongst themselves. Once a cluster has been bootstrapped, additional
* members may be {@link #join(NodeId...) joined} to the cluster. In the event that the bootstrapped members cannot
* reach a quorum to elect a leader, bootstrap will continue until successful.
* <p>
* It is critical that all servers in a bootstrap configuration be started with the same exact set of members.
* Bootstrapping multiple servers with different configurations may result in split brain.
* <p>
* The {@link CompletableFuture} returned by this method will be completed once the cluster has been bootstrapped,
* a leader has been elected, and the leader has been notified of the local server's client configurations.
*
* @param cluster The bootstrap cluster configuration.
* @return A completable future to be completed once the cluster has been bootstrapped.
*/
CompletableFuture<Void> bootstrap(Collection<NodeId> cluster);
/**
* Joins the cluster.
* <p>
* Joining the cluster results in the local server being added to an existing cluster that has already been
* bootstrapped. The provided configuration will be used to connect to the existing cluster and submit a join
* request. Once the server has been added to the existing cluster's configuration, the join operation is complete.
* <p>
* Any {@link RaftMember.Type type} of server may join a cluster. In order to join a cluster, the provided list of
* bootstrapped members must be non-empty and must include at least one active member of the cluster. If no member
* in the configuration is reachable, the server will continue to attempt to join the cluster until successful. If
* the provided cluster configuration is empty, the returned {@link CompletableFuture} will be completed exceptionally.
* <p>
* When the server joins the cluster, the local server will be transitioned into its initial state as defined by
* the configured {@link RaftMember.Type}. Once the server has joined, it will immediately begin participating in
* Raft and asynchronous replication according to its configuration.
* <p>
* It's important to note that the provided cluster configuration will only be used the first time the server attempts
* to join the cluster. Thereafter, in the event that the server crashes and is restarted by {@code join}ing the cluster
* again, the last known configuration will be used assuming the server is configured with persistent storage. Only when
* the server leaves the cluster will its configuration and log be reset.
* <p>
* In order to preserve safety during configuration changes, Copycat leaders do not allow concurrent configuration
* changes. In the event that an existing configuration change (a server joining or leaving the cluster or a
* member being {@link RaftMember#promote() promoted} or {@link RaftMember#demote() demoted}) is under way, the local
* server will retry attempts to join the cluster until successful. If the server fails to reach the leader,
* the join will be retried until successful.
*
* @param cluster A list of cluster member addresses to join.
* @return A completable future to be completed once the local server has joined the cluster.
*/
default CompletableFuture<Void> join(NodeId... cluster) {
return join(Arrays.asList(cluster));
}
/**
* Joins the cluster.
* <p>
* Joining the cluster results in the local server being added to an existing cluster that has already been
* bootstrapped. The provided configuration will be used to connect to the existing cluster and submit a join
* request. Once the server has been added to the existing cluster's configuration, the join operation is complete.
* <p>
* Any {@link RaftMember.Type type} of server may join a cluster. In order to join a cluster, the provided list of
* bootstrapped members must be non-empty and must include at least one active member of the cluster. If no member
* in the configuration is reachable, the server will continue to attempt to join the cluster until successful. If
* the provided cluster configuration is empty, the returned {@link CompletableFuture} will be completed exceptionally.
* <p>
* When the server joins the cluster, the local server will be transitioned into its initial state as defined by
* the configured {@link RaftMember.Type}. Once the server has joined, it will immediately begin participating in
* Raft and asynchronous replication according to its configuration.
* <p>
* It's important to note that the provided cluster configuration will only be used the first time the server attempts
* to join the cluster. Thereafter, in the event that the server crashes and is restarted by {@code join}ing the cluster
* again, the last known configuration will be used assuming the server is configured with persistent storage. Only when
* the server leaves the cluster will its configuration and log be reset.
* <p>
* In order to preserve safety during configuration changes, Copycat leaders do not allow concurrent configuration
* changes. In the event that an existing configuration change (a server joining or leaving the cluster or a
* member being {@link RaftMember#promote() promoted} or {@link RaftMember#demote() demoted}) is under way, the local
* server will retry attempts to join the cluster until successful. If the server fails to reach the leader,
* the join will be retried until successful.
*
* @param cluster A collection of cluster member addresses to join.
* @return A completable future to be completed once the local server has joined the cluster.
*/
CompletableFuture<Void> join(Collection<NodeId> cluster);
/**
* Leaves the cluster.
* <p>
* Invocations of this method will cause the local {@link RaftServer} to leave the cluster.
* <em>This method is for advanced usage only.</em> Typically, users should use {@link RaftServer#leave()}
* to leave the cluster and close a server in order to ensure all associated resources are properly closed.
* <p>
* When a server leaves the cluster, the server submits a {@link io.atomix.protocols.raft.protocol.LeaveRequest}
* to the cluster leader. The leader will replicate and commit the configuration change in order to remove the
* leaving server from the cluster and notify each member of the leaving server.
* <p>
* In order to preserve safety during configuration changes, Copycat leaders do not allow concurrent configuration
* changes. In the event that an existing configuration change (a server joining or leaving the cluster or a
* member being {@link RaftMember#promote() promoted} or {@link RaftMember#demote() demoted}) is under way, the local
* server will retry attempts to leave the cluster until successful. The server will continuously attempt to
* leave the cluster until successful.
*
* @return A completable future to be completed once the local server has left the cluster.
*/
CompletableFuture<Void> leave();
/**
* Registers a callback to be called when a member joins the cluster.
* <p>
* The registered {@code callback} will be called whenever a new {@link RaftMember} joins the cluster. Membership
* changes are sequentially consistent, meaning each server in the cluster will see members join in the same
* order, but different servers may see members join at different points in time. Users should not in any case
* assume that because one server has seen a member join the cluster all servers have.
*
* @param listener The listener to be called when a member joins the cluster.
*/
void addJoinListener(Consumer<RaftMember> listener);
/**
* Removes a join listener from the cluster.
*
* @param listener The listener to remove from the cluster.
*/
void removeJoinListener(Consumer<RaftMember> listener);
/**
* Registers a callback to be called when a member leaves the cluster.
* <p>
* The registered {@code callback} will be called whenever an existing {@link RaftMember} leaves the cluster. Membership
* changes are sequentially consistent, meaning each server in the cluster will see members leave in the same
* order, but different servers may see members leave at different points in time. Users should not in any case
* assume that because one server has seen a member leave the cluster all servers have.
*
* @param listener The listener to be called when a member leaves the cluster.
*/
void addLeaveListener(Consumer<RaftMember> listener);
/**
* Removes a leave listener from the cluster.
*
* @param listener The listener to remove from the cluster.
*/
void removeLeaveListener(Consumer<RaftMember> listener);
}