forked from voldemort/voldemort
/
AdminClient.java
3189 lines (2890 loc) · 168 KB
/
AdminClient.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright 2008-2013 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package voldemort.client.protocol.admin;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.StringReader;
import java.net.Socket;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.nio.channels.ReadableByteChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import voldemort.VoldemortException;
import voldemort.client.ClientConfig;
import voldemort.client.SocketStoreClientFactory;
import voldemort.client.SystemStore;
import voldemort.client.protocol.RequestFormatType;
import voldemort.client.protocol.VoldemortFilter;
import voldemort.client.protocol.pb.ProtoUtils;
import voldemort.client.protocol.pb.VAdminProto;
import voldemort.client.protocol.pb.VAdminProto.RebalancePartitionInfoMap;
import voldemort.client.protocol.pb.VProto;
import voldemort.client.protocol.pb.VProto.RequestType;
import voldemort.client.rebalance.RebalancePartitionsInfo;
import voldemort.cluster.Cluster;
import voldemort.cluster.Node;
import voldemort.routing.RoutingStrategy;
import voldemort.routing.RoutingStrategyFactory;
import voldemort.server.RequestRoutingType;
import voldemort.server.protocol.admin.AsyncOperationStatus;
import voldemort.server.rebalance.RebalancerState;
import voldemort.server.rebalance.VoldemortRebalancingException;
import voldemort.store.ErrorCodeMapper;
import voldemort.store.Store;
import voldemort.store.StoreDefinition;
import voldemort.store.StoreUtils;
import voldemort.store.metadata.MetadataStore;
import voldemort.store.metadata.MetadataStore.VoldemortState;
import voldemort.store.mysql.MysqlStorageConfiguration;
import voldemort.store.readonly.ReadOnlyStorageConfiguration;
import voldemort.store.readonly.ReadOnlyStorageFormat;
import voldemort.store.readonly.ReadOnlyStorageMetadata;
import voldemort.store.readonly.ReadOnlyUtils;
import voldemort.store.routed.NodeValue;
import voldemort.store.slop.Slop;
import voldemort.store.slop.Slop.Operation;
import voldemort.store.socket.SocketDestination;
import voldemort.store.socket.SocketStore;
import voldemort.store.socket.clientrequest.ClientRequestExecutorPool;
import voldemort.store.system.SystemStoreConstants;
import voldemort.store.views.ViewStorageConfiguration;
import voldemort.utils.ByteArray;
import voldemort.utils.ByteUtils;
import voldemort.utils.ClusterUtils;
import voldemort.utils.MetadataVersionStoreUtils;
import voldemort.utils.NetworkClassLoader;
import voldemort.utils.Pair;
import voldemort.utils.RebalanceUtils;
import voldemort.utils.StoreDefinitionUtils;
import voldemort.utils.Utils;
import voldemort.versioning.VectorClock;
import voldemort.versioning.Version;
import voldemort.versioning.Versioned;
import voldemort.xml.ClusterMapper;
import voldemort.xml.StoreDefinitionsMapper;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.protobuf.ByteString;
import com.google.protobuf.Message;
/**
* AdminClient is intended for administrative functionality that is useful and
* often needed, but should be used sparingly (if at all) at the application
* level.
* <p>
* Some of the uses of AdminClient include
* <ul>
* <li>Extraction of data for backups</li>
* <li>Extraction of all keys</li>
* <li>Bulk loading entries</li>
* <li>Migrating partitions</li>
* <li>Get/Update metadata info from selective Nodes</li>
* <li>Used extensively by rebalancing (dynamic node addition/deletion) feature
* (presently in development).</li>
* </ul>
*
*/
public class AdminClient {
private static final Logger logger = Logger.getLogger(AdminClient.class);
private static final ClusterMapper clusterMapper = new ClusterMapper();
private static final StoreDefinitionsMapper storeMapper = new StoreDefinitionsMapper();
// Parameters for exponential back off
private static final long INITIAL_DELAY = 250; // Initial delay
private static final long PRINT_STATS_THRESHOLD = 10000;
private static final long PRINT_STATS_INTERVAL = 5 * 60 * 1000; // 5 minutes
private static final String CLUSTER_VERSION_KEY = "cluster.xml";
private static final int DEFAULT_ZONE_ID = 0;
public final static List<String> restoreStoreEngineBlackList = Arrays.asList(ReadOnlyStorageConfiguration.TYPE_NAME,
ViewStorageConfiguration.TYPE_NAME);
private final ErrorCodeMapper errorMapper;
private final SocketPool socketPool;
private final AdminStoreClient adminStoreClient;
private final NetworkClassLoader networkClassLoader;
private final AdminClientConfig adminClientConfig;
private Cluster currentCluster;
private SystemStore<String, String> sysStoreVersion = null;
private String[] cachedBootstrapURLs = null;
private int cachedZoneID = -1;
final public AdminClient.HelperOperations helperOps;
final public AdminClient.RPCOperations rpcOps;
final public AdminClient.MetadataManagementOperations metadataMgmtOps;
final public AdminClient.StoreManagementOperations storeMgmtOps;
final public AdminClient.StoreMaintenanceOperations storeMntOps;
final public AdminClient.BulkStreamingFetchOperations bulkFetchOps;
final public AdminClient.StreamingOperations streamingOps;
final public AdminClient.StoreOperations storeOps;
final public AdminClient.RestoreOperations restoreOps;
final public AdminClient.RebalancingOperations rebalanceOps;
final public AdminClient.ReadOnlySpecificOperations readonlyOps;
/**
* Common initialization of AdminClient.
*
* @param adminClientConfig Client configuration for SocketPool-based
* operations.
* @param clientConfig Client configurations for
* ClientRequestExecutorPool-based operations via the (private)
* AdminStoreClient.
*/
private AdminClient(AdminClientConfig adminClientConfig, ClientConfig clientConfig) {
this.helperOps = this.new HelperOperations();
this.rpcOps = this.new RPCOperations();
this.metadataMgmtOps = this.new MetadataManagementOperations();
this.storeMgmtOps = this.new StoreManagementOperations();
this.storeMntOps = this.new StoreMaintenanceOperations();
this.bulkFetchOps = this.new BulkStreamingFetchOperations();
this.streamingOps = this.new StreamingOperations();
this.storeOps = this.new StoreOperations();
this.restoreOps = this.new RestoreOperations();
this.rebalanceOps = this.new RebalancingOperations();
this.readonlyOps = this.new ReadOnlySpecificOperations();
this.errorMapper = new ErrorCodeMapper();
this.networkClassLoader = new NetworkClassLoader(Thread.currentThread()
.getContextClassLoader());
this.adminClientConfig = adminClientConfig;
this.socketPool = helperOps.createSocketPool(adminClientConfig);
this.adminStoreClient = new AdminStoreClient(clientConfig);
}
/**
* Create an instance of AdminClient given a URL of a node in the cluster.
* The bootstrap URL is used to get the cluster metadata.
*
* @param bootstrapURL URL pointing to the bootstrap node
* @param adminClientConfig Configuration for AdminClient specifying client
* parameters eg. <br>
* <ul>
* <t>
* <li>number of threads</li>
* <li>number of sockets per node</li>
* <li>socket buffer size</li>
* </ul>
*/
public AdminClient(String bootstrapURL,
AdminClientConfig adminClientConfig,
ClientConfig clientConfig) {
this(adminClientConfig, clientConfig);
this.currentCluster = helperOps.getClusterFromBootstrapURL(bootstrapURL);
helperOps.cacheSystemStoreParams(bootstrapURL, DEFAULT_ZONE_ID);
}
/**
* Create an instance of AdminClient given a {@link Cluster} object.
*
* @param cluster Initialized cluster object, describing the nodes we wish
* to contact
* @param adminClientConfig Configuration for AdminClient specifying client
* parameters eg. <br>
* <ul>
* <t>
* <li>number of threads</li>
* <li>number of sockets per node</li>
* <li>socket buffer size</li>
* </ul>
*/
public AdminClient(Cluster cluster,
AdminClientConfig adminClientConfig,
ClientConfig clientConfig) {
this(adminClientConfig, clientConfig);
this.currentCluster = cluster;
Node node = cluster.getNodeById(0);
String bootstrapURL = "tcp://" + node.getHost() + ":" + node.getSocketPort();
helperOps.cacheSystemStoreParams(bootstrapURL, DEFAULT_ZONE_ID);
}
/**
* Wrapper for the actual AdminClient constructor given the URL of a node in
* the cluster.
*
* @param bootstrapURL URL pointing to the bootstrap node
* @param adminClientConfig Configuration for AdminClient specifying client
* parameters eg. <br>
* <ul>
* <t>
* <li>number of threads</li>
* <li>number of sockets per node</li>
* <li>socket buffer size</li>
* </ul>
* @param zoneID The primary Zone ID for the purpose of the SystemStore
*/
public AdminClient(String bootstrapURL,
AdminClientConfig adminClientConfig,
ClientConfig clientConfig,
int zoneID) {
this(bootstrapURL, adminClientConfig, clientConfig);
helperOps.cacheSystemStoreParams(bootstrapURL, zoneID);
}
/**
* Stop the AdminClient cleanly freeing all resources.
*/
public void close() {
this.socketPool.close();
this.adminStoreClient.close();
}
/**
* Set cluster info for AdminClient to use.
*
* @param cluster Set the current cluster
*/
public void setAdminClientCluster(Cluster cluster) {
this.currentCluster = cluster;
}
/**
* Get the cluster info AdminClient is using.
*
* @return Returns the current cluster being used by the admin client
*/
public Cluster getAdminClientCluster() {
return currentCluster;
}
/**
* Encapsulates helper methods used across the admin client
*
*/
public class HelperOperations {
/**
* Cache the paramater values for the internal system store client.
* These cached values are used every time the system store client needs
* to be initialized (useful when the cluster.xml changes).
*
* @param bootstrapURL The URL to bootstrap from
* @param zoneID Indicates the primary zone of the sytem store client
*/
private void cacheSystemStoreParams(String bootstrapURL, int zoneID) {
String[] bootstrapUrls = new String[1];
bootstrapUrls[0] = bootstrapURL;
AdminClient.this.cachedBootstrapURLs = bootstrapUrls;
AdminClient.this.cachedZoneID = zoneID;
}
/**
* Create a system store client based on the cached bootstrap URLs and
* Zone ID
*/
private void initSystemStoreClient() {
if(AdminClient.this.cachedBootstrapURLs != null && AdminClient.this.cachedZoneID >= 0) {
try {
sysStoreVersion = new SystemStore<String, String>(SystemStoreConstants.SystemStoreName.voldsys$_metadata_version_persistence.name(),
AdminClient.this.cachedBootstrapURLs,
AdminClient.this.cachedZoneID);
} catch(Exception e) {
logger.debug("Error while creating a system store client for metadata version store.");
}
}
}
private Cluster getClusterFromBootstrapURL(String bootstrapURL) {
ClientConfig config = new ClientConfig();
// try to bootstrap metadata from bootstrapUrl
config.setBootstrapUrls(bootstrapURL);
SocketStoreClientFactory factory = new SocketStoreClientFactory(config);
// get Cluster from bootStrapUrl
String clusterXml = factory.bootstrapMetadataWithRetries(MetadataStore.CLUSTER_KEY,
factory.validateUrls(config.getBootstrapUrls()));
// release all threads/sockets hold by the factory.
factory.close();
return clusterMapper.readCluster(new StringReader(clusterXml), false);
}
private SocketPool createSocketPool(AdminClientConfig config) {
TimeUnit unit = TimeUnit.SECONDS;
return new SocketPool(config.getMaxConnectionsPerNode(),
(int) unit.toMillis(config.getAdminConnectionTimeoutSec()),
(int) unit.toMillis(config.getAdminSocketTimeoutSec()),
config.getAdminSocketBufferSize(),
config.getAdminSocketKeepAlive());
}
private void close(Socket socket) {
try {
socket.close();
} catch(IOException e) {
logger.warn("Failed to close socket");
}
}
// TODO: (refactor) Move this helper method to ClusterInstance
/**
* For a particular node, finds out all the [replica, partition] tuples
* it needs to steal in order to be brought back to normal state
*
* @param restoringNode The id of the node which needs to be restored
* @param cluster The cluster definition
* @param storeDef The store definition to use
* @return Map of node id to map of replica type and corresponding
* partition list
*/
public Map<Integer, HashMap<Integer, List<Integer>>> getReplicationMapping(int restoringNode,
Cluster cluster,
StoreDefinition storeDef) {
return getReplicationMapping(restoringNode, cluster, storeDef, -1);
}
// TODO: (refactor) Move this helper method to ClusterInstance
/**
* For a particular node, finds out all the [replica, partition] tuples
* it needs to steal in order to be brought back to normal state
*
* @param restoringNode The id of the node which needs to be restored
* @param cluster The cluster definition
* @param storeDef The store definition to use
* @param zoneId zone from which nodes are chosen, -1 means no zone
* preference
* @return Map of node id to map of replica type and corresponding
* partition list
*/
public Map<Integer, HashMap<Integer, List<Integer>>> getReplicationMapping(int restoringNode,
Cluster cluster,
StoreDefinition storeDef,
int zoneId) {
Map<Integer, HashMap<Integer, List<Integer>>> returnMap = Maps.newHashMap();
RoutingStrategy strategy = new RoutingStrategyFactory().updateRoutingStrategy(storeDef,
cluster);
List<Integer> restoringNodePartition = cluster.getNodeById(restoringNode)
.getPartitionIds();
// Go over every partition. As long as one of them belongs to the
// current node list, find its replica
for(Node node: cluster.getNodes()) {
for(int partitionId: node.getPartitionIds()) {
List<Integer> replicatingPartitions = strategy.getReplicatingPartitionList(partitionId);
List<Integer> extraCopyReplicatingPartitions = Lists.newArrayList(replicatingPartitions);
if(replicatingPartitions.size() <= 1) {
throw new VoldemortException("Store "
+ storeDef.getName()
+ " cannot be restored from replica because replication factor = 1");
}
if(replicatingPartitions.removeAll(restoringNodePartition)) {
if(replicatingPartitions.size() == 0) {
throw new VoldemortException("Found a case where-in the overlap of "
+ "the node partition list results in no replicas "
+ "being left in replicating list");
}
addDonorWithZonePreference(replicatingPartitions,
extraCopyReplicatingPartitions,
returnMap,
zoneId,
cluster,
storeDef);
}
}
}
return returnMap;
}
// TODO: (refactor) Move this helper method to ClusterInstance
/**
* For each partition that need to be restored, find a donor node that
* owns the partition AND has the same zone ID as requested. -1 means no
* zone preference required when finding a donor node needs to steal in
* order to
*
* @param remainderPartitions The replicating partitions without the one
* needed by the restore node
* @param originalPartitions The entire replicating partition list
* (including the one needed by the restore node)
* @param donorMap All donor nodes that will be fetched from
* @param zondId The zone from which donor nodes will be chosen from; -1
* means all zones are fine
* @param cluster The cluster metadata
* @param storeDef The store to be restored
* @return
*/
private void addDonorWithZonePreference(List<Integer> remainderPartitions,
List<Integer> originalPartitions,
Map<Integer, HashMap<Integer, List<Integer>>> donorMap,
int zoneId,
Cluster cluster,
StoreDefinition storeDef) {
Map<Integer, Integer> partitionToNodeId = ClusterUtils.getCurrentPartitionMapping(cluster);
int nodeId = -1;
int replicaType = -1;
int partition = -1;
boolean found = false;
int index = 0;
while(!found && index < remainderPartitions.size()) {
replicaType = originalPartitions.indexOf(remainderPartitions.get(index));
nodeId = partitionToNodeId.get(remainderPartitions.get(index));
if(-1 == zoneId || cluster.getNodeById(nodeId).getZoneId() == zoneId) {
found = true;
} else {
index++;
}
}
if(!found) {
throw new VoldemortException("unable to find a node to fetch partition "
+ partition + " of replica type " + replicaType
+ " for store " + storeDef.getName());
}
partition = originalPartitions.get(0);
HashMap<Integer, List<Integer>> replicaToPartitionList = null;
if(donorMap.containsKey(nodeId)) {
replicaToPartitionList = donorMap.get(nodeId);
} else {
replicaToPartitionList = Maps.newHashMap();
donorMap.put(nodeId, replicaToPartitionList);
}
List<Integer> partitions = null;
if(replicaToPartitionList.containsKey(replicaType)) {
partitions = replicaToPartitionList.get(replicaType);
} else {
partitions = Lists.newArrayList();
replicaToPartitionList.put(replicaType, partitions);
}
partitions.add(partition);
}
public void throwException(VProto.Error error) {
throw AdminClient.this.errorMapper.getError((short) error.getErrorCode(),
error.getErrorMessage());
}
private VAdminProto.VoldemortFilter encodeFilter(VoldemortFilter filter) throws IOException {
Class<?> cl = filter.getClass();
byte[] classBytes = networkClassLoader.dumpClass(cl);
return VAdminProto.VoldemortFilter.newBuilder()
.setName(cl.getName())
.setData(ProtoUtils.encodeBytes(new ByteArray(classBytes)))
.build();
}
// TODO: (refactor) It is weird that a helper method invokes
// metadataMgmtOps.getRemoteStoreDefList. Refactor this method to split
// some of the functionality into ClusterInstance, and then move this
// method to metadataMgmtOps. Or, do the refactoring wrt ClusterInstance
// and change the method interface to require storeDef rather than
// storeName to avoid doing a metadata operation...
/**
* Converts list of partitions to map of replica type to partition list.
*
* @param nodeId Node which is donating data
* @param storeName Name of store
* @param partitions List of partitions ( primary OR replicas ) to move
* @return Map of replica type to partitions
*/
private HashMap<Integer, List<Integer>> getReplicaToPartitionMap(int nodeId,
String storeName,
List<Integer> partitions) {
List<StoreDefinition> allStoreDefs = metadataMgmtOps.getRemoteStoreDefList(nodeId)
.getValue();
allStoreDefs.addAll(SystemStoreConstants.getAllSystemStoreDefs());
StoreDefinition def = StoreDefinitionUtils.getStoreDefinitionWithName(allStoreDefs,
storeName);
HashMap<Integer, List<Integer>> replicaToPartitionList = Maps.newHashMap();
for(int replicaNum = 0; replicaNum < def.getReplicationFactor(); replicaNum++) {
replicaToPartitionList.put(replicaNum, partitions);
}
return replicaToPartitionList;
}
}
/**
* Encapsulates all the RPC helper methods
*
*/
public class RPCOperations {
private <T extends Message.Builder> T sendAndReceive(int nodeId, Message message, T builder) {
Node node = AdminClient.this.getAdminClientCluster().getNodeById(nodeId);
SocketDestination destination = new SocketDestination(node.getHost(),
node.getAdminPort(),
RequestFormatType.ADMIN_PROTOCOL_BUFFERS);
SocketAndStreams sands = socketPool.checkout(destination);
try {
DataOutputStream outputStream = sands.getOutputStream();
DataInputStream inputStream = sands.getInputStream();
ProtoUtils.writeMessage(outputStream, message);
outputStream.flush();
return ProtoUtils.readToBuilder(inputStream, builder);
} catch(IOException e) {
helperOps.close(sands.getSocket());
throw new VoldemortException(e);
} finally {
socketPool.checkin(destination, sands);
}
}
/**
* Get the status of an Async Operation running at (remote) node.
*
* <b>If The operation is complete, then the operation will be removed
* from a list of currently running operations.</b>
*
* @param nodeId Id on which the operation is running
* @param requestId Id of the operation itself
* @return The status of the operation
*/
public AsyncOperationStatus getAsyncRequestStatus(int nodeId, int requestId) {
VAdminProto.AsyncOperationStatusRequest asyncRequest = VAdminProto.AsyncOperationStatusRequest.newBuilder()
.setRequestId(requestId)
.build();
VAdminProto.VoldemortAdminRequest adminRequest = VAdminProto.VoldemortAdminRequest.newBuilder()
.setType(VAdminProto.AdminRequestType.ASYNC_OPERATION_STATUS)
.setAsyncOperationStatus(asyncRequest)
.build();
VAdminProto.AsyncOperationStatusResponse.Builder response = rpcOps.sendAndReceive(nodeId,
adminRequest,
VAdminProto.AsyncOperationStatusResponse.newBuilder());
if(response.hasError())
helperOps.throwException(response.getError());
AsyncOperationStatus status = new AsyncOperationStatus(response.getRequestId(),
response.getDescription());
status.setStatus(response.getStatus());
status.setComplete(response.getComplete());
return status;
}
/**
* Retrieves a list of asynchronous request ids on the server. Does not
* include the completed requests
*
* @param nodeId The id of the node whose request ids we want
* @return List of async request ids
*/
public List<Integer> getAsyncRequestList(int nodeId) {
return getAsyncRequestList(nodeId, false);
}
/**
* Retrieves a list of asynchronous request ids on the server. Depending
* on the boolean passed also retrieves the completed requests
*
* @param nodeId The id of the node whose request ids we want
* @param showComplete Boolean to indicate if we want to include the
* completed requests as well
* @return List of async request ids
*/
public List<Integer> getAsyncRequestList(int nodeId, boolean showComplete) {
VAdminProto.AsyncOperationListRequest asyncOperationListRequest = VAdminProto.AsyncOperationListRequest.newBuilder()
.setShowComplete(showComplete)
.build();
VAdminProto.VoldemortAdminRequest adminRequest = VAdminProto.VoldemortAdminRequest.newBuilder()
.setType(VAdminProto.AdminRequestType.ASYNC_OPERATION_LIST)
.setAsyncOperationList(asyncOperationListRequest)
.build();
VAdminProto.AsyncOperationListResponse.Builder response = rpcOps.sendAndReceive(nodeId,
adminRequest,
VAdminProto.AsyncOperationListResponse.newBuilder());
if(response.hasError())
helperOps.throwException(response.getError());
return response.getRequestIdsList();
}
/**
* To stop an asynchronous request on the particular node
*
* @param nodeId The id of the node on which the request is running
* @param requestId The id of the request to terminate
*/
public void stopAsyncRequest(int nodeId, int requestId) {
VAdminProto.AsyncOperationStopRequest asyncOperationStopRequest = VAdminProto.AsyncOperationStopRequest.newBuilder()
.setRequestId(requestId)
.build();
VAdminProto.VoldemortAdminRequest adminRequest = VAdminProto.VoldemortAdminRequest.newBuilder()
.setType(VAdminProto.AdminRequestType.ASYNC_OPERATION_STOP)
.setAsyncOperationStop(asyncOperationStopRequest)
.build();
VAdminProto.AsyncOperationStopResponse.Builder response = rpcOps.sendAndReceive(nodeId,
adminRequest,
VAdminProto.AsyncOperationStopResponse.newBuilder());
if(response.hasError())
helperOps.throwException(response.getError());
}
/**
* Wait for async task at (remote) nodeId to finish completion, using
* exponential backoff to poll the task completion status.
* <p>
*
* <i>Logs the status at each status check if debug is enabled.</i>
*
* @param nodeId Id of the node to poll
* @param requestId Id of the request to check
* @param maxWait Maximum time we'll keep checking a request until we
* give up
* @param timeUnit Unit in which maxWait is expressed.
* @param higherStatus A higher level async operation object. If this
* waiting is being run another async operation this helps us
* propagate the status all the way up.
* @return description The final description attached with the response
* @throws VoldemortException if task failed to finish in specified
* maxWait time.
*/
public String waitForCompletion(int nodeId,
int requestId,
long maxWait,
TimeUnit timeUnit,
AsyncOperationStatus higherStatus) {
long delay = INITIAL_DELAY;
long waitUntil = System.currentTimeMillis() + timeUnit.toMillis(maxWait);
String description = null;
String oldStatus = "";
while(System.currentTimeMillis() < waitUntil) {
try {
AsyncOperationStatus status = getAsyncRequestStatus(nodeId, requestId);
if(!status.getStatus().equalsIgnoreCase(oldStatus))
logger.info("Status from node " + nodeId + " (" + status.getDescription()
+ ") - " + status.getStatus());
oldStatus = status.getStatus();
if(higherStatus != null) {
higherStatus.setStatus("Status from node " + nodeId + " ("
+ status.getDescription() + ") - "
+ status.getStatus());
}
description = status.getDescription();
if(status.hasException())
throw status.getException();
if(status.isComplete())
return status.getStatus();
if(delay < adminClientConfig.getMaxBackoffDelayMs())
delay <<= 1;
try {
Thread.sleep(delay);
} catch(InterruptedException e) {
Thread.currentThread().interrupt();
}
} catch(Exception e) {
throw new VoldemortException("Failed while waiting for async task ("
+ description + ") at node " + nodeId
+ " to finish", e);
}
}
throw new VoldemortException("Failed to finish task requestId: " + requestId
+ " in maxWait " + maxWait + " " + timeUnit.toString());
}
/**
* Wait for async task at (remote) nodeId to finish completion, using
* exponential backoff to poll the task completion status.
* <p>
*
* <i>Logs the status at each status check if debug is enabled.</i>
*
* @param nodeId Id of the node to poll
* @param requestId Id of the request to check
* @param maxWait Maximum time we'll keep checking a request until we
* give up
* @param timeUnit Unit in which maxWait is expressed.
* @return description The final description attached with the response
* @throws VoldemortException if task failed to finish in specified
* maxWait time.
*/
public String waitForCompletion(int nodeId, int requestId, long maxWait, TimeUnit timeUnit) {
return waitForCompletion(nodeId, requestId, maxWait, timeUnit, null);
}
/**
* Wait till the passed value matches with the metadata value returned
* by the remote node for the passed key.
* <p>
*
* <i>Logs the status at each status check if debug is enabled.</i>
*
* @param nodeId Id of the node to poll
* @param key metadata key to keep checking for current value
* @param value metadata value should match for exit criteria.
* @param maxWait Maximum time we'll keep checking a request until we
* give up
* @param timeUnit Unit in which maxWait is expressed.
*/
public void waitForCompletion(int nodeId,
String key,
String value,
long maxWait,
TimeUnit timeUnit) {
long delay = INITIAL_DELAY;
long waitUntil = System.currentTimeMillis() + timeUnit.toMillis(maxWait);
while(System.currentTimeMillis() < waitUntil) {
String currentValue = metadataMgmtOps.getRemoteMetadata(nodeId, key).getValue();
if(value.equals(currentValue))
return;
logger.debug("waiting for value " + value + " for metadata key " + key
+ " from remote node " + nodeId + " currentValue " + currentValue);
if(delay < adminClientConfig.getMaxBackoffDelayMs())
delay <<= 1;
try {
Thread.sleep(delay);
} catch(InterruptedException e) {
Thread.currentThread().interrupt();
}
}
throw new VoldemortException("Failed to get matching value " + value + " for key "
+ key + " at remote node " + nodeId + " in maximum wait"
+ maxWait + " " + timeUnit.toString() + " time.");
}
}
/**
* Encapsulates all operations that deal with cluster.xml and stores.xml
*
*/
public class MetadataManagementOperations {
/**
* Update the metadata version for the given key (cluster or store). The
* new value set is the current timestamp.
*
* @param versionKey The metadata key for which Version should be
* incremented
*/
public void updateMetadataversion(String versionKey) {
helperOps.initSystemStoreClient();
Properties props = MetadataVersionStoreUtils.getProperties(AdminClient.this.sysStoreVersion);
long newValue = 0;
if(props != null && props.getProperty(versionKey) != null) {
logger.debug("Version obtained = " + props.getProperty(versionKey));
newValue = System.currentTimeMillis();
} else {
logger.debug("Current version is null. Assuming version 0.");
if(props == null) {
props = new Properties();
}
}
props.setProperty(versionKey, Long.toString(newValue));
MetadataVersionStoreUtils.setProperties(AdminClient.this.sysStoreVersion, props);
}
/**
* Set the metadata versions to the given set
*
* @param newProperties The new metadata versions to be set across all
* the nodes in the cluster
*/
public void setMetadataversion(Properties newProperties) {
helperOps.initSystemStoreClient();
MetadataVersionStoreUtils.setProperties(AdminClient.this.sysStoreVersion, newProperties);
}
/**
* Update metadata at the given remoteNodeId.
* <p>
*
* Metadata keys can be one of {@link MetadataStore#METADATA_KEYS}<br>
* eg.<br>
* <li>cluster metadata (cluster.xml as string)
* <li>stores definitions (stores.xml as string)
* <li>Server states <br <br>
* See {@link voldemort.store.metadata.MetadataStore} for more
* information.
*
* @param remoteNodeId Id of the node
* @param key Metadata key to update
* @param value Value for the metadata key
*/
public void updateRemoteMetadata(int remoteNodeId, String key, Versioned<String> value) {
ByteArray keyBytes = new ByteArray(ByteUtils.getBytes(key, "UTF-8"));
Versioned<byte[]> valueBytes = new Versioned<byte[]>(ByteUtils.getBytes(value.getValue(),
"UTF-8"),
value.getVersion());
VAdminProto.VoldemortAdminRequest request = VAdminProto.VoldemortAdminRequest.newBuilder()
.setType(VAdminProto.AdminRequestType.UPDATE_METADATA)
.setUpdateMetadata(VAdminProto.UpdateMetadataRequest.newBuilder()
.setKey(ByteString.copyFrom(keyBytes.get()))
.setVersioned(ProtoUtils.encodeVersioned(valueBytes))
.build())
.build();
VAdminProto.UpdateMetadataResponse.Builder response = rpcOps.sendAndReceive(remoteNodeId,
request,
VAdminProto.UpdateMetadataResponse.newBuilder());
if(response.hasError())
helperOps.throwException(response.getError());
}
/**
* Wrapper for updateRemoteMetadata function used against a single Node
* It basically loops over the entire list of Nodes that we need to
* execute the required operation against. It also increments the
* version of the corresponding metadata in the system store.
* <p>
*
* Metadata keys can be one of {@link MetadataStore#METADATA_KEYS}<br>
* eg.<br>
* <li>cluster metadata (cluster.xml as string)
* <li>stores definitions (stores.xml as string)
* <li>Server states <br <br>
* See {@link voldemort.store.metadata.MetadataStore} for more
* information.
*
* @param remoteNodeId Id of the node
* @param key Metadata key to update
* @param value Value for the metadata key
*
* */
public void updateRemoteMetadata(List<Integer> remoteNodeIds,
String key,
Versioned<String> value) {
for(Integer currentNodeId: remoteNodeIds) {
System.out.println("Setting " + key + " for "
+ getAdminClientCluster().getNodeById(currentNodeId).getHost()
+ ":"
+ getAdminClientCluster().getNodeById(currentNodeId).getId());
updateRemoteMetadata(currentNodeId, key, value);
}
/*
* Assuming everything is fine, we now increment the metadata
* version for the key
*/
if(key.equals(CLUSTER_VERSION_KEY)) {
metadataMgmtOps.updateMetadataversion(key);
}
}
/**
* Get the metadata on a remote node.
* <p>
* Metadata keys can be one of {@link MetadataStore#METADATA_KEYS}<br>
* eg.<br>
* <li>cluster metadata (cluster.xml as string)
* <li>stores definitions (stores.xml as string)
* <li>Server states <br <br>
* See {@link voldemort.store.metadata.MetadataStore} for more
* information.
*
* @param remoteNodeId Id of the node
* @param key Metadata key to update
* @return Metadata with its associated
* {@link voldemort.versioning.Version}
*/
public Versioned<String> getRemoteMetadata(int remoteNodeId, String key) {
ByteArray keyBytes = new ByteArray(ByteUtils.getBytes(key, "UTF-8"));
VAdminProto.VoldemortAdminRequest request = VAdminProto.VoldemortAdminRequest.newBuilder()
.setType(VAdminProto.AdminRequestType.GET_METADATA)
.setGetMetadata(VAdminProto.GetMetadataRequest.newBuilder()
.setKey(ByteString.copyFrom(keyBytes.get())))
.build();
VAdminProto.GetMetadataResponse.Builder response = rpcOps.sendAndReceive(remoteNodeId,
request,
VAdminProto.GetMetadataResponse.newBuilder());
if(response.hasError())
helperOps.throwException(response.getError());
Versioned<byte[]> value = ProtoUtils.decodeVersioned(response.getVersion());
return new Versioned<String>(ByteUtils.getString(value.getValue(), "UTF-8"),
value.getVersion());
}
/**
* Update the cluster information {@link MetadataStore#CLUSTER_KEY} on a
* remote node.
* <p>
*
* @param nodeId Id of the remote node
* @param cluster The new cluster object
* @throws VoldemortException
*/
public void updateRemoteCluster(int nodeId, Cluster cluster, Version clock)
throws VoldemortException {
updateRemoteMetadata(nodeId,
MetadataStore.CLUSTER_KEY,
new Versioned<String>(clusterMapper.writeCluster(cluster), clock));
}
/**
* Get the cluster information from a remote node.
* <p>
*
* @param nodeId Node to retrieve information from
* @return A cluster object with its
* {@link voldemort.versioning.Version}
* @throws VoldemortException
*/
public Versioned<Cluster> getRemoteCluster(int nodeId) throws VoldemortException {
Versioned<String> value = metadataMgmtOps.getRemoteMetadata(nodeId,
MetadataStore.CLUSTER_KEY);
Cluster cluster = clusterMapper.readCluster(new StringReader(value.getValue()), false);
return new Versioned<Cluster>(cluster, value.getVersion());
}
/**
* Update the store definitions on a remote node.
* <p>
*
* @param nodeId The node id of the machine
* @param storesList The new store list
* @throws VoldemortException
*/
public void updateRemoteStoreDefList(int nodeId, List<StoreDefinition> storesList)
throws VoldemortException {
// get current version.
VectorClock oldClock = (VectorClock) metadataMgmtOps.getRemoteStoreDefList(nodeId)
.getVersion();
updateRemoteMetadata(nodeId,
MetadataStore.STORES_KEY,
new Versioned<String>(storeMapper.writeStoreList(storesList),