(ByteUtils.getBytes(value.getValue(),
+ "UTF-8"),
+ value.getVersion());
+
+ VProto.KeyedVersions.Builder keyedVersion = VProto.KeyedVersions.newBuilder()
+ .setKey(ProtoUtils.encodeBytes(keyBytes));
+ keyedVersion.addVersions(ProtoUtils.encodeVersioned(valueBytes));
+ allKeyVersions.add(keyedVersion.build());
+
+ }
VAdminProto.VoldemortAdminRequest request = VAdminProto.VoldemortAdminRequest.newBuilder()
.setType(VAdminProto.AdminRequestType.UPDATE_METADATA)
.setUpdateMetadata(VAdminProto.UpdateMetadataRequest.newBuilder()
- .setKey(ByteString.copyFrom(keyBytes.get()))
- .setVersioned(ProtoUtils.encodeVersioned(valueBytes))
+ .addAllMetadataEntry(allKeyVersions)
+
.build())
.build();
VAdminProto.UpdateMetadataResponse.Builder response = rpcOps.sendAndReceive(remoteNodeId,
@@ -1961,6 +1991,8 @@ public SocketStore getSocketStore(int nodeId, String storeName) {
SocketStore newSocketStore = null;
try {
+ // Unless request format is protobuf, IGNORE_CHECKS
+ // will not work otherwise
newSocketStore = clientPool.create(storeName,
node.getHost(),
node.getSocketPort(),
@@ -2118,7 +2150,8 @@ public void updateEntries(int nodeId,
}
/**
- * Fetch key/value tuples belonging to a node with given key values
+ * Fetch key/value tuples from a given server, directly from storage
+ * engine
*
*
* Entries are being queried synchronously
@@ -2139,7 +2172,6 @@ public Iterator queryKeys(int nodeId,
try {
store = adminStoreClient.getSocketStore(nodeId, storeName);
-
} catch(Exception e) {
throw new VoldemortException(e);
}
@@ -2391,6 +2423,8 @@ public Versioned getRemoteRebalancerState(int nodeId) {
*/
public void rebalanceStateChange(Cluster existingCluster,
Cluster transitionCluster,
+ List existingStoreDefs,
+ List targetStoreDefs,
List rebalancePartitionPlanList,
boolean swapRO,
boolean changeClusterMetadata,
@@ -2410,6 +2444,7 @@ public void rebalanceStateChange(Cluster existingCluster,
try {
individualStateChange(nodeId,
transitionCluster,
+ targetStoreDefs,
stealerNodeToPlan.get(nodeId),
swapRO,
changeClusterMetadata,
@@ -2454,6 +2489,7 @@ public void rebalanceStateChange(Cluster existingCluster,
try {
individualStateChange(completedNodeId,
existingCluster,
+ existingStoreDefs,
stealerNodeToPlan.get(completedNodeId),
swapRO,
changeClusterMetadata,
@@ -2493,6 +2529,7 @@ public void rebalanceStateChange(Cluster existingCluster,
*/
private void individualStateChange(int nodeId,
Cluster cluster,
+ List storeDefs,
List rebalancePartitionPlanList,
boolean swapRO,
boolean changeClusterMetadata,
@@ -2531,6 +2568,7 @@ private void individualStateChange(int nodeId,
.setChangeRebalanceState(changeRebalanceState)
.setClusterString(clusterMapper.writeCluster(cluster))
.setRollback(rollback)
+ .setStoresString(new StoreDefinitionsMapper().writeStoreList(storeDefs))
.build();
VAdminProto.VoldemortAdminRequest adminRequest = VAdminProto.VoldemortAdminRequest.newBuilder()
.setRebalanceStateChange(getRebalanceStateChangeRequest)
diff --git a/src/java/voldemort/client/protocol/admin/StreamingClient.java b/src/java/voldemort/client/protocol/admin/StreamingClient.java
index 50430d2c40..a1c0320f37 100644
--- a/src/java/voldemort/client/protocol/admin/StreamingClient.java
+++ b/src/java/voldemort/client/protocol/admin/StreamingClient.java
@@ -51,21 +51,19 @@
/**
*
- * @author anagpal
*
- * The streaming API allows for send events into voldemort stores in the
- * async fashion. All the partition and replication logic will be taken
- * care of internally.
+ * The streaming API allows for send events into voldemort stores in the async
+ * fashion. All the partition and replication logic will be taken care of
+ * internally.
*
- * The users is expected to provide two callbacks, one for performing
- * period checkpoints and one for recovering the streaming process from
- * the last checkpoint.
+ * The users is expected to provide two callbacks, one for performing period
+ * checkpoints and one for recovering the streaming process from the last
+ * checkpoint.
*
- * NOTE: The API is not thread safe, since if multiple threads use this
- * API we cannot make any guarantees about correctness of the
- * checkpointing mechanism.
+ * NOTE: The API is not thread safe, since if multiple threads use this API we
+ * cannot make any guarantees about correctness of the checkpointing mechanism.
*
- * Right now we expect this to used by a single thread per data source
+ * Right now we expect this to used by a single thread per data source
*
*/
public class StreamingClient {
@@ -91,9 +89,6 @@ public class StreamingClient {
// Every batch size we commit
private static int CHECKPOINT_COMMIT_SIZE;
- // TODO
- // provide knobs to tune this
- private static int TIME_COMMIT_SIZE = 30;
// we have to throttle to a certain qps
private static int THROTTLE_QPS;
private int entriesProcessed;
@@ -504,14 +499,12 @@ public synchronized void streamingPut(ByteArray key, Versioned value, St
} catch(InterruptedException e1) {
MARKED_BAD = true;
- logger.error("Recovery Callback failed");
- e1.printStackTrace();
+ logger.error("Recovery Callback failed", e1);
throw new VoldemortException("Recovery Callback failed");
} catch(ExecutionException e1) {
MARKED_BAD = true;
- logger.error("Recovery Callback failed");
- e1.printStackTrace();
- throw new VoldemortException("Recovery Callback failed");
+ logger.error("Recovery Callback failed during execution", e1);
+ throw new VoldemortException("Recovery Callback failed during execution");
}
e.printStackTrace();
@@ -519,12 +512,9 @@ public synchronized void streamingPut(ByteArray key, Versioned value, St
}
- int secondsTime = calendar.get(Calendar.SECOND);
- if(entriesProcessed == CHECKPOINT_COMMIT_SIZE || secondsTime % TIME_COMMIT_SIZE == 0) {
+ if(entriesProcessed == CHECKPOINT_COMMIT_SIZE) {
entriesProcessed = 0;
-
commitToVoldemort();
-
}
throttler.maybeThrottle(1);
@@ -542,6 +532,45 @@ public synchronized void commitToVoldemort() {
commitToVoldemort(storeNames);
}
+ /**
+ * Reset streaming session by unmarking it as bad
+ */
+ public void unmarkBad() {
+ MARKED_BAD = false;
+ }
+
+ /**
+ * mark a node as blacklisted
+ *
+ * @param NodeId Integer node id of the node to be balcklisted
+ */
+
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ public void blacklistNode(int nodeId) {
+ Collection nodesInCluster = adminClient.getAdminClientCluster().getNodes();
+
+ if(blackListedNodes == null) {
+ blackListedNodes = new ArrayList();
+ }
+ blackListedNodes.add(nodeId);
+
+ for(Node node: nodesInCluster) {
+
+ if(node.getId() == nodeId) {
+ nodesToStream.remove(node);
+ break;
+ }
+
+ }
+
+ for(String store: storeNames) {
+ SocketAndStreams sands = nodeIdStoreToSocketAndStreams.get(new Pair(store, nodeId));
+ close(sands.getSocket());
+ SocketDestination destination = nodeIdStoreToSocketRequest.get(new Pair(store, nodeId));
+ streamingSocketPool.checkin(destination, sands);
+ }
+ }
+
/**
* Flush the network buffer and write all entries to the serve. then wait
* for an ack from the server. This is a blocking call. It is invoked on
@@ -557,6 +586,8 @@ private void commitToVoldemort(List storeNamesToCommit) {
if(logger.isDebugEnabled()) {
logger.debug("Trying to commit to Voldemort");
}
+
+ boolean hasError = false;
for(Node node: nodesToStream) {
for(String store: storeNamesToCommit) {
@@ -576,64 +607,48 @@ private void commitToVoldemort(List storeNamesToCommit) {
VAdminProto.UpdatePartitionEntriesResponse.Builder updateResponse = ProtoUtils.readToBuilder(inputStream,
VAdminProto.UpdatePartitionEntriesResponse.newBuilder());
if(updateResponse.hasError()) {
- logger.warn("Invoking the Recovery Callback");
- Future future = streamingresults.submit(recoveryCallback);
- try {
- future.get();
-
- } catch(InterruptedException e1) {
- MARKED_BAD = true;
- logger.error("Recovery Callback failed");
- e1.printStackTrace();
- throw new VoldemortException("Recovery Callback failed");
- } catch(ExecutionException e1) {
- MARKED_BAD = true;
- logger.error("Recovery Callback failed");
- e1.printStackTrace();
- throw new VoldemortException("Recovery Callback failed");
- }
- } else {
- if(logger.isDebugEnabled()) {
- logger.debug("Commit successful");
- logger.debug("calling checkpoint callback");
- }
- Future future = streamingresults.submit(checkpointCallback);
- try {
- future.get();
-
- } catch(InterruptedException e1) {
-
- logger.warn("Checkpoint callback failed!");
- e1.printStackTrace();
- } catch(ExecutionException e1) {
- logger.warn("Checkpoint callback failed!");
- e1.printStackTrace();
- }
+ hasError = true;
}
} catch(IOException e) {
+ logger.error("Exception during commit", e);
+ hasError = true;
+ }
+ }
- logger.warn("Invoking the Recovery Callback");
- Future future = streamingresults.submit(recoveryCallback);
- try {
- future.get();
-
- } catch(InterruptedException e1) {
- MARKED_BAD = true;
- logger.error("Recovery Callback failed");
- e1.printStackTrace();
- throw new VoldemortException("Recovery Callback failed");
- } catch(ExecutionException e1) {
- MARKED_BAD = true;
- logger.error("Recovery Callback failed");
- e1.printStackTrace();
- throw new VoldemortException("Recovery Callback failed");
- }
+ }
- e.printStackTrace();
- }
+ // remove redundant callbacks
+ if(hasError) {
+
+ logger.warn("Invoking the Recovery Callback");
+ Future future = streamingresults.submit(recoveryCallback);
+ try {
+ future.get();
+
+ } catch(InterruptedException e1) {
+ MARKED_BAD = true;
+ logger.error("Recovery Callback failed", e1);
+ throw new VoldemortException("Recovery Callback failed");
+ } catch(ExecutionException e1) {
+ MARKED_BAD = true;
+ logger.error("Recovery Callback failed during execution", e1);
+ throw new VoldemortException("Recovery Callback failed during execution");
+ }
+ } else {
+ if(logger.isDebugEnabled()) {
+ logger.debug("Commit successful");
+ logger.debug("calling checkpoint callback");
}
+ Future future = streamingresults.submit(checkpointCallback);
+ try {
+ future.get();
+ } catch(InterruptedException e1) {
+ logger.warn("Checkpoint callback failed!", e1);
+ } catch(ExecutionException e1) {
+ logger.warn("Checkpoint callback failed during execution!", e1);
+ }
}
}
@@ -654,11 +669,9 @@ public synchronized void closeStreamingSessions(Callable resetCheckpointCallback
future.get();
} catch(InterruptedException e1) {
- // TODO Auto-generated catch block
- e1.printStackTrace();
+ logger.warn("Reset check point interrupted" + e1);
} catch(ExecutionException e1) {
- // TODO Auto-generated catch block
- e1.printStackTrace();
+ logger.warn("Reset check point interrupted during execution" + e1);
}
}
diff --git a/src/java/voldemort/client/protocol/pb/ProtoUtils.java b/src/java/voldemort/client/protocol/pb/ProtoUtils.java
index 7eef3359b0..ede67945a5 100644
--- a/src/java/voldemort/client/protocol/pb/ProtoUtils.java
+++ b/src/java/voldemort/client/protocol/pb/ProtoUtils.java
@@ -31,6 +31,7 @@
import voldemort.client.protocol.pb.VAdminProto.PerStorePartitionTuple;
import voldemort.client.protocol.pb.VAdminProto.ROStoreVersionDirMap;
import voldemort.client.protocol.pb.VAdminProto.RebalancePartitionInfoMap;
+import voldemort.client.protocol.pb.VProto.KeyedVersions;
import voldemort.client.rebalance.RebalancePartitionsInfo;
import voldemort.store.ErrorCodeMapper;
import voldemort.utils.ByteArray;
@@ -198,6 +199,17 @@ public static Versioned decodeVersioned(VProto.Versioned versioned) {
decodeClock(versioned.getVersion()));
}
+ /**
+ * Given a list of value versions for the metadata keys we are just
+ * interested in the value at index 0 This is because even if we have to
+ * update the cluster.xml we marshall a single key into a versioned list
+ * Hence we just look at the value at index 0
+ *
+ */
+ public static Versioned decodeVersionedMetadataKeyValue(KeyedVersions keyValue) {
+ return decodeVersioned(keyValue.getVersions(0));
+ }
+
public static List> decodeVersions(List versioned) {
List> values = new ArrayList>(versioned.size());
for(VProto.Versioned v: versioned)
diff --git a/src/java/voldemort/client/protocol/pb/VAdminProto.java b/src/java/voldemort/client/protocol/pb/VAdminProto.java
index e34aec3563..cca0257143 100644
--- a/src/java/voldemort/client/protocol/pb/VAdminProto.java
+++ b/src/java/voldemort/client/protocol/pb/VAdminProto.java
@@ -821,38 +821,32 @@ public UpdateMetadataRequest getDefaultInstanceForType() {
return voldemort.client.protocol.pb.VAdminProto.internal_static_voldemort_UpdateMetadataRequest_fieldAccessorTable;
}
- // required bytes key = 1;
- public static final int KEY_FIELD_NUMBER = 1;
- private boolean hasKey;
- private com.google.protobuf.ByteString key_ = com.google.protobuf.ByteString.EMPTY;
- public boolean hasKey() { return hasKey; }
- public com.google.protobuf.ByteString getKey() { return key_; }
-
- // required .voldemort.Versioned versioned = 2;
- public static final int VERSIONED_FIELD_NUMBER = 2;
- private boolean hasVersioned;
- private voldemort.client.protocol.pb.VProto.Versioned versioned_;
- public boolean hasVersioned() { return hasVersioned; }
- public voldemort.client.protocol.pb.VProto.Versioned getVersioned() { return versioned_; }
+ // repeated .voldemort.KeyedVersions metadataEntry = 1;
+ public static final int METADATAENTRY_FIELD_NUMBER = 1;
+ private java.util.List metadataEntry_ =
+ java.util.Collections.emptyList();
+ public java.util.List getMetadataEntryList() {
+ return metadataEntry_;
+ }
+ public int getMetadataEntryCount() { return metadataEntry_.size(); }
+ public voldemort.client.protocol.pb.VProto.KeyedVersions getMetadataEntry(int index) {
+ return metadataEntry_.get(index);
+ }
private void initFields() {
- versioned_ = voldemort.client.protocol.pb.VProto.Versioned.getDefaultInstance();
}
public final boolean isInitialized() {
- if (!hasKey) return false;
- if (!hasVersioned) return false;
- if (!getVersioned().isInitialized()) return false;
+ for (voldemort.client.protocol.pb.VProto.KeyedVersions element : getMetadataEntryList()) {
+ if (!element.isInitialized()) return false;
+ }
return true;
}
public void writeTo(com.google.protobuf.CodedOutputStream output)
throws java.io.IOException {
getSerializedSize();
- if (hasKey()) {
- output.writeBytes(1, getKey());
- }
- if (hasVersioned()) {
- output.writeMessage(2, getVersioned());
+ for (voldemort.client.protocol.pb.VProto.KeyedVersions element : getMetadataEntryList()) {
+ output.writeMessage(1, element);
}
getUnknownFields().writeTo(output);
}
@@ -863,13 +857,9 @@ public int getSerializedSize() {
if (size != -1) return size;
size = 0;
- if (hasKey()) {
+ for (voldemort.client.protocol.pb.VProto.KeyedVersions element : getMetadataEntryList()) {
size += com.google.protobuf.CodedOutputStream
- .computeBytesSize(1, getKey());
- }
- if (hasVersioned()) {
- size += com.google.protobuf.CodedOutputStream
- .computeMessageSize(2, getVersioned());
+ .computeMessageSize(1, element);
}
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
@@ -1013,6 +1003,10 @@ public voldemort.client.protocol.pb.VAdminProto.UpdateMetadataRequest buildParti
throw new IllegalStateException(
"build() has already been called on this Builder.");
}
+ if (result.metadataEntry_ != java.util.Collections.EMPTY_LIST) {
+ result.metadataEntry_ =
+ java.util.Collections.unmodifiableList(result.metadataEntry_);
+ }
voldemort.client.protocol.pb.VAdminProto.UpdateMetadataRequest returnMe = result;
result = null;
return returnMe;
@@ -1029,11 +1023,11 @@ public Builder mergeFrom(com.google.protobuf.Message other) {
public Builder mergeFrom(voldemort.client.protocol.pb.VAdminProto.UpdateMetadataRequest other) {
if (other == voldemort.client.protocol.pb.VAdminProto.UpdateMetadataRequest.getDefaultInstance()) return this;
- if (other.hasKey()) {
- setKey(other.getKey());
- }
- if (other.hasVersioned()) {
- mergeVersioned(other.getVersioned());
+ if (!other.metadataEntry_.isEmpty()) {
+ if (result.metadataEntry_.isEmpty()) {
+ result.metadataEntry_ = new java.util.ArrayList();
+ }
+ result.metadataEntry_.addAll(other.metadataEntry_);
}
this.mergeUnknownFields(other.getUnknownFields());
return this;
@@ -1061,16 +1055,9 @@ public Builder mergeFrom(
break;
}
case 10: {
- setKey(input.readBytes());
- break;
- }
- case 18: {
- voldemort.client.protocol.pb.VProto.Versioned.Builder subBuilder = voldemort.client.protocol.pb.VProto.Versioned.newBuilder();
- if (hasVersioned()) {
- subBuilder.mergeFrom(getVersioned());
- }
+ voldemort.client.protocol.pb.VProto.KeyedVersions.Builder subBuilder = voldemort.client.protocol.pb.VProto.KeyedVersions.newBuilder();
input.readMessage(subBuilder, extensionRegistry);
- setVersioned(subBuilder.buildPartial());
+ addMetadataEntry(subBuilder.buildPartial());
break;
}
}
@@ -1078,61 +1065,54 @@ public Builder mergeFrom(
}
- // required bytes key = 1;
- public boolean hasKey() {
- return result.hasKey();
+ // repeated .voldemort.KeyedVersions metadataEntry = 1;
+ public java.util.List getMetadataEntryList() {
+ return java.util.Collections.unmodifiableList(result.metadataEntry_);
}
- public com.google.protobuf.ByteString getKey() {
- return result.getKey();
+ public int getMetadataEntryCount() {
+ return result.getMetadataEntryCount();
}
- public Builder setKey(com.google.protobuf.ByteString value) {
+ public voldemort.client.protocol.pb.VProto.KeyedVersions getMetadataEntry(int index) {
+ return result.getMetadataEntry(index);
+ }
+ public Builder setMetadataEntry(int index, voldemort.client.protocol.pb.VProto.KeyedVersions value) {
if (value == null) {
- throw new NullPointerException();
- }
- result.hasKey = true;
- result.key_ = value;
+ throw new NullPointerException();
+ }
+ result.metadataEntry_.set(index, value);
return this;
}
- public Builder clearKey() {
- result.hasKey = false;
- result.key_ = getDefaultInstance().getKey();
+ public Builder setMetadataEntry(int index, voldemort.client.protocol.pb.VProto.KeyedVersions.Builder builderForValue) {
+ result.metadataEntry_.set(index, builderForValue.build());
return this;
}
-
- // required .voldemort.Versioned versioned = 2;
- public boolean hasVersioned() {
- return result.hasVersioned();
- }
- public voldemort.client.protocol.pb.VProto.Versioned getVersioned() {
- return result.getVersioned();
- }
- public Builder setVersioned(voldemort.client.protocol.pb.VProto.Versioned value) {
+ public Builder addMetadataEntry(voldemort.client.protocol.pb.VProto.KeyedVersions value) {
if (value == null) {
throw new NullPointerException();
}
- result.hasVersioned = true;
- result.versioned_ = value;
+ if (result.metadataEntry_.isEmpty()) {
+ result.metadataEntry_ = new java.util.ArrayList();
+ }
+ result.metadataEntry_.add(value);
return this;
}
- public Builder setVersioned(voldemort.client.protocol.pb.VProto.Versioned.Builder builderForValue) {
- result.hasVersioned = true;
- result.versioned_ = builderForValue.build();
+ public Builder addMetadataEntry(voldemort.client.protocol.pb.VProto.KeyedVersions.Builder builderForValue) {
+ if (result.metadataEntry_.isEmpty()) {
+ result.metadataEntry_ = new java.util.ArrayList();
+ }
+ result.metadataEntry_.add(builderForValue.build());
return this;
}
- public Builder mergeVersioned(voldemort.client.protocol.pb.VProto.Versioned value) {
- if (result.hasVersioned() &&
- result.versioned_ != voldemort.client.protocol.pb.VProto.Versioned.getDefaultInstance()) {
- result.versioned_ =
- voldemort.client.protocol.pb.VProto.Versioned.newBuilder(result.versioned_).mergeFrom(value).buildPartial();
- } else {
- result.versioned_ = value;
+ public Builder addAllMetadataEntry(
+ java.lang.Iterable extends voldemort.client.protocol.pb.VProto.KeyedVersions> values) {
+ if (result.metadataEntry_.isEmpty()) {
+ result.metadataEntry_ = new java.util.ArrayList();
}
- result.hasVersioned = true;
+ super.addAll(values, result.metadataEntry_);
return this;
}
- public Builder clearVersioned() {
- result.hasVersioned = false;
- result.versioned_ = voldemort.client.protocol.pb.VProto.Versioned.getDefaultInstance();
+ public Builder clearMetadataEntry() {
+ result.metadataEntry_ = java.util.Collections.emptyList();
return this;
}
@@ -18185,29 +18165,36 @@ public voldemort.client.protocol.pb.VAdminProto.RebalancePartitionInfoMap getReb
public boolean hasClusterString() { return hasClusterString; }
public java.lang.String getClusterString() { return clusterString_; }
- // required bool swap_ro = 3;
- public static final int SWAP_RO_FIELD_NUMBER = 3;
+ // required string stores_string = 3;
+ public static final int STORES_STRING_FIELD_NUMBER = 3;
+ private boolean hasStoresString;
+ private java.lang.String storesString_ = "";
+ public boolean hasStoresString() { return hasStoresString; }
+ public java.lang.String getStoresString() { return storesString_; }
+
+ // required bool swap_ro = 4;
+ public static final int SWAP_RO_FIELD_NUMBER = 4;
private boolean hasSwapRo;
private boolean swapRo_ = false;
public boolean hasSwapRo() { return hasSwapRo; }
public boolean getSwapRo() { return swapRo_; }
- // required bool change_cluster_metadata = 4;
- public static final int CHANGE_CLUSTER_METADATA_FIELD_NUMBER = 4;
+ // required bool change_cluster_metadata = 5;
+ public static final int CHANGE_CLUSTER_METADATA_FIELD_NUMBER = 5;
private boolean hasChangeClusterMetadata;
private boolean changeClusterMetadata_ = false;
public boolean hasChangeClusterMetadata() { return hasChangeClusterMetadata; }
public boolean getChangeClusterMetadata() { return changeClusterMetadata_; }
- // required bool change_rebalance_state = 5;
- public static final int CHANGE_REBALANCE_STATE_FIELD_NUMBER = 5;
+ // required bool change_rebalance_state = 6;
+ public static final int CHANGE_REBALANCE_STATE_FIELD_NUMBER = 6;
private boolean hasChangeRebalanceState;
private boolean changeRebalanceState_ = false;
public boolean hasChangeRebalanceState() { return hasChangeRebalanceState; }
public boolean getChangeRebalanceState() { return changeRebalanceState_; }
- // required bool rollback = 6;
- public static final int ROLLBACK_FIELD_NUMBER = 6;
+ // required bool rollback = 7;
+ public static final int ROLLBACK_FIELD_NUMBER = 7;
private boolean hasRollback;
private boolean rollback_ = false;
public boolean hasRollback() { return hasRollback; }
@@ -18217,6 +18204,7 @@ private void initFields() {
}
public final boolean isInitialized() {
if (!hasClusterString) return false;
+ if (!hasStoresString) return false;
if (!hasSwapRo) return false;
if (!hasChangeClusterMetadata) return false;
if (!hasChangeRebalanceState) return false;
@@ -18236,17 +18224,20 @@ public void writeTo(com.google.protobuf.CodedOutputStream output)
if (hasClusterString()) {
output.writeString(2, getClusterString());
}
+ if (hasStoresString()) {
+ output.writeString(3, getStoresString());
+ }
if (hasSwapRo()) {
- output.writeBool(3, getSwapRo());
+ output.writeBool(4, getSwapRo());
}
if (hasChangeClusterMetadata()) {
- output.writeBool(4, getChangeClusterMetadata());
+ output.writeBool(5, getChangeClusterMetadata());
}
if (hasChangeRebalanceState()) {
- output.writeBool(5, getChangeRebalanceState());
+ output.writeBool(6, getChangeRebalanceState());
}
if (hasRollback()) {
- output.writeBool(6, getRollback());
+ output.writeBool(7, getRollback());
}
getUnknownFields().writeTo(output);
}
@@ -18265,21 +18256,25 @@ public int getSerializedSize() {
size += com.google.protobuf.CodedOutputStream
.computeStringSize(2, getClusterString());
}
+ if (hasStoresString()) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeStringSize(3, getStoresString());
+ }
if (hasSwapRo()) {
size += com.google.protobuf.CodedOutputStream
- .computeBoolSize(3, getSwapRo());
+ .computeBoolSize(4, getSwapRo());
}
if (hasChangeClusterMetadata()) {
size += com.google.protobuf.CodedOutputStream
- .computeBoolSize(4, getChangeClusterMetadata());
+ .computeBoolSize(5, getChangeClusterMetadata());
}
if (hasChangeRebalanceState()) {
size += com.google.protobuf.CodedOutputStream
- .computeBoolSize(5, getChangeRebalanceState());
+ .computeBoolSize(6, getChangeRebalanceState());
}
if (hasRollback()) {
size += com.google.protobuf.CodedOutputStream
- .computeBoolSize(6, getRollback());
+ .computeBoolSize(7, getRollback());
}
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
@@ -18452,6 +18447,9 @@ public Builder mergeFrom(voldemort.client.protocol.pb.VAdminProto.RebalanceState
if (other.hasClusterString()) {
setClusterString(other.getClusterString());
}
+ if (other.hasStoresString()) {
+ setStoresString(other.getStoresString());
+ }
if (other.hasSwapRo()) {
setSwapRo(other.getSwapRo());
}
@@ -18499,19 +18497,23 @@ public Builder mergeFrom(
setClusterString(input.readString());
break;
}
- case 24: {
- setSwapRo(input.readBool());
+ case 26: {
+ setStoresString(input.readString());
break;
}
case 32: {
- setChangeClusterMetadata(input.readBool());
+ setSwapRo(input.readBool());
break;
}
case 40: {
- setChangeRebalanceState(input.readBool());
+ setChangeClusterMetadata(input.readBool());
break;
}
case 48: {
+ setChangeRebalanceState(input.readBool());
+ break;
+ }
+ case 56: {
setRollback(input.readBool());
break;
}
@@ -18592,7 +18594,28 @@ public Builder clearClusterString() {
return this;
}
- // required bool swap_ro = 3;
+ // required string stores_string = 3;
+ public boolean hasStoresString() {
+ return result.hasStoresString();
+ }
+ public java.lang.String getStoresString() {
+ return result.getStoresString();
+ }
+ public Builder setStoresString(java.lang.String value) {
+ if (value == null) {
+ throw new NullPointerException();
+ }
+ result.hasStoresString = true;
+ result.storesString_ = value;
+ return this;
+ }
+ public Builder clearStoresString() {
+ result.hasStoresString = false;
+ result.storesString_ = getDefaultInstance().getStoresString();
+ return this;
+ }
+
+ // required bool swap_ro = 4;
public boolean hasSwapRo() {
return result.hasSwapRo();
}
@@ -18610,7 +18633,7 @@ public Builder clearSwapRo() {
return this;
}
- // required bool change_cluster_metadata = 4;
+ // required bool change_cluster_metadata = 5;
public boolean hasChangeClusterMetadata() {
return result.hasChangeClusterMetadata();
}
@@ -18628,7 +18651,7 @@ public Builder clearChangeClusterMetadata() {
return this;
}
- // required bool change_rebalance_state = 5;
+ // required bool change_rebalance_state = 6;
public boolean hasChangeRebalanceState() {
return result.hasChangeRebalanceState();
}
@@ -18646,7 +18669,7 @@ public Builder clearChangeRebalanceState() {
return this;
}
- // required bool rollback = 6;
+ // required bool rollback = 7;
public boolean hasRollback() {
return result.hasRollback();
}
@@ -23141,198 +23164,198 @@ public Builder clearReserveMemory() {
"emort-client.proto\"!\n\022GetMetadataRequest" +
"\022\013\n\003key\030\001 \002(\014\"]\n\023GetMetadataResponse\022%\n\007" +
"version\030\001 \001(\0132\024.voldemort.Versioned\022\037\n\005e" +
- "rror\030\002 \001(\0132\020.voldemort.Error\"M\n\025UpdateMe" +
- "tadataRequest\022\013\n\003key\030\001 \002(\014\022\'\n\tversioned\030" +
- "\002 \002(\0132\024.voldemort.Versioned\"9\n\026UpdateMet" +
- "adataResponse\022\037\n\005error\030\001 \001(\0132\020.voldemort" +
- ".Error\"7\n\tFileEntry\022\021\n\tfile_name\030\001 \002(\t\022\027" +
- "\n\017file_size_bytes\030\002 \002(\003\"F\n\016PartitionEntr",
- "y\022\013\n\003key\030\001 \002(\014\022\'\n\tversioned\030\002 \002(\0132\024.vold" +
- "emort.Versioned\"\216\001\n\035UpdatePartitionEntri" +
- "esRequest\022\r\n\005store\030\001 \002(\t\0222\n\017partition_en" +
- "try\030\002 \002(\0132\031.voldemort.PartitionEntry\022*\n\006" +
- "filter\030\003 \001(\0132\032.voldemort.VoldemortFilter" +
- "\"A\n\036UpdatePartitionEntriesResponse\022\037\n\005er" +
- "ror\030\001 \001(\0132\020.voldemort.Error\"-\n\017Voldemort" +
- "Filter\022\014\n\004name\030\001 \002(\t\022\014\n\004data\030\002 \002(\014\"\257\001\n\030U" +
- "pdateSlopEntriesRequest\022\r\n\005store\030\001 \002(\t\022\013" +
- "\n\003key\030\002 \002(\014\022\'\n\007version\030\003 \002(\0132\026.voldemort",
- ".VectorClock\022,\n\014request_type\030\004 \002(\0162\026.vol" +
- "demort.RequestType\022\r\n\005value\030\005 \001(\014\022\021\n\ttra" +
- "nsform\030\006 \001(\014\"<\n\031UpdateSlopEntriesRespons" +
- "e\022\037\n\005error\030\001 \001(\0132\020.voldemort.Error\"d\n\032Fe" +
- "tchPartitionFilesRequest\022\r\n\005store\030\001 \002(\t\022" +
- "7\n\024replica_to_partition\030\002 \003(\0132\031.voldemor" +
- "t.PartitionTuple\"\244\002\n\034FetchPartitionEntri" +
- "esRequest\0227\n\024replica_to_partition\030\001 \003(\0132" +
- "\031.voldemort.PartitionTuple\022\r\n\005store\030\002 \002(" +
- "\t\022*\n\006filter\030\003 \001(\0132\032.voldemort.VoldemortF",
- "ilter\022\024\n\014fetch_values\030\004 \001(\010\022*\n\"OBSOLETE_" +
- "_DO_NOT_USE__skip_records\030\005 \001(\003\022\027\n\017initi" +
- "al_cluster\030\006 \001(\t\022\026\n\016fetch_orphaned\030\007 \001(\010" +
- "\022\035\n\025records_per_partition\030\010 \001(\003\"\201\001\n\035Fetc" +
- "hPartitionEntriesResponse\0222\n\017partition_e" +
- "ntry\030\001 \001(\0132\031.voldemort.PartitionEntry\022\013\n" +
- "\003key\030\002 \001(\014\022\037\n\005error\030\003 \001(\0132\020.voldemort.Er" +
- "ror\"\254\001\n\035DeletePartitionEntriesRequest\022\r\n" +
- "\005store\030\001 \002(\t\0227\n\024replica_to_partition\030\002 \003" +
- "(\0132\031.voldemort.PartitionTuple\022*\n\006filter\030",
- "\003 \001(\0132\032.voldemort.VoldemortFilter\022\027\n\017ini" +
- "tial_cluster\030\004 \001(\t\"P\n\036DeletePartitionEnt" +
- "riesResponse\022\r\n\005count\030\001 \001(\003\022\037\n\005error\030\002 \001" +
- "(\0132\020.voldemort.Error\"\317\001\n\035InitiateFetchAn" +
- "dUpdateRequest\022\017\n\007node_id\030\001 \002(\005\022\r\n\005store" +
- "\030\002 \002(\t\022*\n\006filter\030\003 \001(\0132\032.voldemort.Volde" +
- "mortFilter\0227\n\024replica_to_partition\030\004 \003(\013" +
- "2\031.voldemort.PartitionTuple\022\027\n\017initial_c" +
- "luster\030\005 \001(\t\022\020\n\010optimize\030\006 \001(\010\"1\n\033AsyncO" +
- "perationStatusRequest\022\022\n\nrequest_id\030\001 \002(",
- "\005\"/\n\031AsyncOperationStopRequest\022\022\n\nreques" +
- "t_id\030\001 \002(\005\"=\n\032AsyncOperationStopResponse" +
- "\022\037\n\005error\030\001 \001(\0132\020.voldemort.Error\"2\n\031Asy" +
- "ncOperationListRequest\022\025\n\rshow_complete\030" +
- "\002 \002(\010\"R\n\032AsyncOperationListResponse\022\023\n\013r" +
- "equest_ids\030\001 \003(\005\022\037\n\005error\030\002 \001(\0132\020.voldem" +
- "ort.Error\":\n\016PartitionTuple\022\024\n\014replica_t" +
- "ype\030\001 \002(\005\022\022\n\npartitions\030\002 \003(\005\"e\n\026PerStor" +
- "ePartitionTuple\022\022\n\nstore_name\030\001 \002(\t\0227\n\024r" +
- "eplica_to_partition\030\002 \003(\0132\031.voldemort.Pa",
- "rtitionTuple\"\370\001\n\031RebalancePartitionInfoM" +
- "ap\022\022\n\nstealer_id\030\001 \002(\005\022\020\n\010donor_id\030\002 \002(\005" +
- "\022\017\n\007attempt\030\003 \002(\005\022C\n\030replica_to_add_part" +
- "ition\030\004 \003(\0132!.voldemort.PerStorePartitio" +
- "nTuple\022F\n\033replica_to_delete_partition\030\005 " +
- "\003(\0132!.voldemort.PerStorePartitionTuple\022\027" +
- "\n\017initial_cluster\030\006 \002(\t\"f\n\034InitiateRebal" +
- "anceNodeRequest\022F\n\030rebalance_partition_i" +
- "nfo\030\001 \002(\0132$.voldemort.RebalancePartition" +
- "InfoMap\"m\n#InitiateRebalanceNodeOnDonorR",
- "equest\022F\n\030rebalance_partition_info\030\001 \003(\013" +
- "2$.voldemort.RebalancePartitionInfoMap\"\212" +
- "\001\n\034AsyncOperationStatusResponse\022\022\n\nreque" +
- "st_id\030\001 \001(\005\022\023\n\013description\030\002 \001(\t\022\016\n\006stat" +
- "us\030\003 \001(\t\022\020\n\010complete\030\004 \001(\010\022\037\n\005error\030\005 \001(" +
- "\0132\020.voldemort.Error\"\'\n\026TruncateEntriesRe" +
- "quest\022\r\n\005store\030\001 \002(\t\":\n\027TruncateEntriesR" +
- "esponse\022\037\n\005error\030\001 \001(\0132\020.voldemort.Error" +
- "\"*\n\017AddStoreRequest\022\027\n\017storeDefinition\030\001" +
- " \002(\t\"3\n\020AddStoreResponse\022\037\n\005error\030\001 \001(\0132",
- "\020.voldemort.Error\"\'\n\022DeleteStoreRequest\022" +
- "\021\n\tstoreName\030\001 \002(\t\"6\n\023DeleteStoreRespons" +
- "e\022\037\n\005error\030\001 \001(\0132\020.voldemort.Error\"P\n\021Fe" +
- "tchStoreRequest\022\022\n\nstore_name\030\001 \002(\t\022\021\n\ts" +
- "tore_dir\030\002 \002(\t\022\024\n\014push_version\030\003 \001(\003\"9\n\020" +
- "SwapStoreRequest\022\022\n\nstore_name\030\001 \002(\t\022\021\n\t" +
- "store_dir\030\002 \002(\t\"P\n\021SwapStoreResponse\022\037\n\005" +
- "error\030\001 \001(\0132\020.voldemort.Error\022\032\n\022previou" +
- "s_store_dir\030\002 \001(\t\"@\n\024RollbackStoreReques" +
- "t\022\022\n\nstore_name\030\001 \002(\t\022\024\n\014push_version\030\002 ",
- "\002(\003\"8\n\025RollbackStoreResponse\022\037\n\005error\030\001 " +
- "\001(\0132\020.voldemort.Error\"&\n\020RepairJobReques" +
- "t\022\022\n\nstore_name\030\001 \001(\t\"4\n\021RepairJobRespon" +
- "se\022\037\n\005error\030\001 \001(\0132\020.voldemort.Error\"=\n\024R" +
- "OStoreVersionDirMap\022\022\n\nstore_name\030\001 \002(\t\022" +
- "\021\n\tstore_dir\030\002 \002(\t\"/\n\031GetROMaxVersionDir" +
- "Request\022\022\n\nstore_name\030\001 \003(\t\"y\n\032GetROMaxV" +
- "ersionDirResponse\022:\n\021ro_store_versions\030\001" +
- " \003(\0132\037.voldemort.ROStoreVersionDirMap\022\037\n" +
- "\005error\030\002 \001(\0132\020.voldemort.Error\"3\n\035GetROC",
- "urrentVersionDirRequest\022\022\n\nstore_name\030\001 " +
- "\003(\t\"}\n\036GetROCurrentVersionDirResponse\022:\n" +
- "\021ro_store_versions\030\001 \003(\0132\037.voldemort.ROS" +
- "toreVersionDirMap\022\037\n\005error\030\002 \001(\0132\020.volde" +
- "mort.Error\"/\n\031GetROStorageFormatRequest\022" +
- "\022\n\nstore_name\030\001 \003(\t\"y\n\032GetROStorageForma" +
- "tResponse\022:\n\021ro_store_versions\030\001 \003(\0132\037.v" +
- "oldemort.ROStoreVersionDirMap\022\037\n\005error\030\002" +
- " \001(\0132\020.voldemort.Error\"@\n\027FailedFetchSto" +
- "reRequest\022\022\n\nstore_name\030\001 \002(\t\022\021\n\tstore_d",
- "ir\030\002 \002(\t\";\n\030FailedFetchStoreResponse\022\037\n\005" +
- "error\030\001 \001(\0132\020.voldemort.Error\"\346\001\n\033Rebala" +
- "nceStateChangeRequest\022K\n\035rebalance_parti" +
- "tion_info_list\030\001 \003(\0132$.voldemort.Rebalan" +
- "cePartitionInfoMap\022\026\n\016cluster_string\030\002 \002" +
- "(\t\022\017\n\007swap_ro\030\003 \002(\010\022\037\n\027change_cluster_me" +
- "tadata\030\004 \002(\010\022\036\n\026change_rebalance_state\030\005" +
- " \002(\010\022\020\n\010rollback\030\006 \002(\010\"?\n\034RebalanceState" +
- "ChangeResponse\022\037\n\005error\030\001 \001(\0132\020.voldemor" +
- "t.Error\"G\n DeleteStoreRebalanceStateRequ",
- "est\022\022\n\nstore_name\030\001 \002(\t\022\017\n\007node_id\030\002 \002(\005" +
- "\"D\n!DeleteStoreRebalanceStateResponse\022\037\n" +
- "\005error\030\001 \001(\0132\020.voldemort.Error\"h\n\023Native" +
- "BackupRequest\022\022\n\nstore_name\030\001 \002(\t\022\022\n\nbac" +
- "kup_dir\030\002 \002(\t\022\024\n\014verify_files\030\003 \002(\010\022\023\n\013i" +
- "ncremental\030\004 \002(\010\">\n\024ReserveMemoryRequest" +
- "\022\022\n\nstore_name\030\001 \002(\t\022\022\n\nsize_in_mb\030\002 \002(\003" +
- "\"8\n\025ReserveMemoryResponse\022\037\n\005error\030\001 \001(\013" +
- "2\020.voldemort.Error\"\360\016\n\025VoldemortAdminReq" +
- "uest\022)\n\004type\030\001 \002(\0162\033.voldemort.AdminRequ",
- "estType\0223\n\014get_metadata\030\002 \001(\0132\035.voldemor" +
- "t.GetMetadataRequest\0229\n\017update_metadata\030" +
- "\003 \001(\0132 .voldemort.UpdateMetadataRequest\022" +
- "J\n\030update_partition_entries\030\004 \001(\0132(.vold" +
- "emort.UpdatePartitionEntriesRequest\022H\n\027f" +
- "etch_partition_entries\030\005 \001(\0132\'.voldemort" +
- ".FetchPartitionEntriesRequest\022J\n\030delete_" +
- "partition_entries\030\006 \001(\0132(.voldemort.Dele" +
- "tePartitionEntriesRequest\022K\n\031initiate_fe" +
- "tch_and_update\030\007 \001(\0132(.voldemort.Initiat",
- "eFetchAndUpdateRequest\022F\n\026async_operatio" +
- "n_status\030\010 \001(\0132&.voldemort.AsyncOperatio" +
- "nStatusRequest\022H\n\027initiate_rebalance_nod" +
- "e\030\t \001(\0132\'.voldemort.InitiateRebalanceNod" +
- "eRequest\022B\n\024async_operation_stop\030\n \001(\0132$" +
- ".voldemort.AsyncOperationStopRequest\022B\n\024" +
- "async_operation_list\030\013 \001(\0132$.voldemort.A" +
- "syncOperationListRequest\022;\n\020truncate_ent" +
- "ries\030\014 \001(\0132!.voldemort.TruncateEntriesRe" +
- "quest\022-\n\tadd_store\030\r \001(\0132\032.voldemort.Add",
- "StoreRequest\0223\n\014delete_store\030\016 \001(\0132\035.vol" +
- "demort.DeleteStoreRequest\0221\n\013fetch_store" +
- "\030\017 \001(\0132\034.voldemort.FetchStoreRequest\022/\n\n" +
- "swap_store\030\020 \001(\0132\033.voldemort.SwapStoreRe" +
- "quest\0227\n\016rollback_store\030\021 \001(\0132\037.voldemor" +
- "t.RollbackStoreRequest\022D\n\026get_ro_max_ver" +
- "sion_dir\030\022 \001(\0132$.voldemort.GetROMaxVersi" +
- "onDirRequest\022L\n\032get_ro_current_version_d" +
- "ir\030\023 \001(\0132(.voldemort.GetROCurrentVersion" +
- "DirRequest\022D\n\025fetch_partition_files\030\024 \001(",
- "\0132%.voldemort.FetchPartitionFilesRequest" +
- "\022@\n\023update_slop_entries\030\026 \001(\0132#.voldemor" +
- "t.UpdateSlopEntriesRequest\022>\n\022failed_fet" +
- "ch_store\030\030 \001(\0132\".voldemort.FailedFetchSt" +
- "oreRequest\022C\n\025get_ro_storage_format\030\031 \001(" +
- "\0132$.voldemort.GetROStorageFormatRequest\022" +
- "F\n\026rebalance_state_change\030\032 \001(\0132&.voldem" +
- "ort.RebalanceStateChangeRequest\022/\n\nrepai" +
- "r_job\030\033 \001(\0132\033.voldemort.RepairJobRequest" +
- "\022X\n initiate_rebalance_node_on_donor\030\034 \001",
- "(\0132..voldemort.InitiateRebalanceNodeOnDo" +
- "norRequest\022Q\n\034delete_store_rebalance_sta" +
- "te\030\035 \001(\0132+.voldemort.DeleteStoreRebalanc" +
- "eStateRequest\0225\n\rnative_backup\030\036 \001(\0132\036.v" +
- "oldemort.NativeBackupRequest\0227\n\016reserve_" +
- "memory\030\037 \001(\0132\037.voldemort.ReserveMemoryRe" +
- "quest*\310\005\n\020AdminRequestType\022\020\n\014GET_METADA" +
- "TA\020\000\022\023\n\017UPDATE_METADATA\020\001\022\034\n\030UPDATE_PART" +
- "ITION_ENTRIES\020\002\022\033\n\027FETCH_PARTITION_ENTRI" +
- "ES\020\003\022\034\n\030DELETE_PARTITION_ENTRIES\020\004\022\035\n\031IN",
- "ITIATE_FETCH_AND_UPDATE\020\005\022\032\n\026ASYNC_OPERA" +
- "TION_STATUS\020\006\022\033\n\027INITIATE_REBALANCE_NODE" +
- "\020\007\022\030\n\024ASYNC_OPERATION_STOP\020\010\022\030\n\024ASYNC_OP" +
- "ERATION_LIST\020\t\022\024\n\020TRUNCATE_ENTRIES\020\n\022\r\n\t" +
- "ADD_STORE\020\013\022\020\n\014DELETE_STORE\020\014\022\017\n\013FETCH_S" +
- "TORE\020\r\022\016\n\nSWAP_STORE\020\016\022\022\n\016ROLLBACK_STORE" +
- "\020\017\022\032\n\026GET_RO_MAX_VERSION_DIR\020\020\022\036\n\032GET_RO" +
- "_CURRENT_VERSION_DIR\020\021\022\031\n\025FETCH_PARTITIO" +
- "N_FILES\020\022\022\027\n\023UPDATE_SLOP_ENTRIES\020\024\022\026\n\022FA" +
- "ILED_FETCH_STORE\020\026\022\031\n\025GET_RO_STORAGE_FOR",
- "MAT\020\027\022\032\n\026REBALANCE_STATE_CHANGE\020\030\022\016\n\nREP" +
- "AIR_JOB\020\031\022$\n INITIATE_REBALANCE_NODE_ON_" +
- "DONOR\020\032\022 \n\034DELETE_STORE_REBALANCE_STATE\020" +
- "\033\022\021\n\rNATIVE_BACKUP\020\034\022\022\n\016RESERVE_MEMORY\020\035" +
- "B-\n\034voldemort.client.protocol.pbB\013VAdmin" +
- "ProtoH\001"
+ "rror\030\002 \001(\0132\020.voldemort.Error\"H\n\025UpdateMe" +
+ "tadataRequest\022/\n\rmetadataEntry\030\001 \003(\0132\030.v" +
+ "oldemort.KeyedVersions\"9\n\026UpdateMetadata" +
+ "Response\022\037\n\005error\030\001 \001(\0132\020.voldemort.Erro" +
+ "r\"7\n\tFileEntry\022\021\n\tfile_name\030\001 \002(\t\022\027\n\017fil" +
+ "e_size_bytes\030\002 \002(\003\"F\n\016PartitionEntry\022\013\n\003",
+ "key\030\001 \002(\014\022\'\n\tversioned\030\002 \002(\0132\024.voldemort" +
+ ".Versioned\"\216\001\n\035UpdatePartitionEntriesReq" +
+ "uest\022\r\n\005store\030\001 \002(\t\0222\n\017partition_entry\030\002" +
+ " \002(\0132\031.voldemort.PartitionEntry\022*\n\006filte" +
+ "r\030\003 \001(\0132\032.voldemort.VoldemortFilter\"A\n\036U" +
+ "pdatePartitionEntriesResponse\022\037\n\005error\030\001" +
+ " \001(\0132\020.voldemort.Error\"-\n\017VoldemortFilte" +
+ "r\022\014\n\004name\030\001 \002(\t\022\014\n\004data\030\002 \002(\014\"\257\001\n\030Update" +
+ "SlopEntriesRequest\022\r\n\005store\030\001 \002(\t\022\013\n\003key" +
+ "\030\002 \002(\014\022\'\n\007version\030\003 \002(\0132\026.voldemort.Vect",
+ "orClock\022,\n\014request_type\030\004 \002(\0162\026.voldemor" +
+ "t.RequestType\022\r\n\005value\030\005 \001(\014\022\021\n\ttransfor" +
+ "m\030\006 \001(\014\"<\n\031UpdateSlopEntriesResponse\022\037\n\005" +
+ "error\030\001 \001(\0132\020.voldemort.Error\"d\n\032FetchPa" +
+ "rtitionFilesRequest\022\r\n\005store\030\001 \002(\t\0227\n\024re" +
+ "plica_to_partition\030\002 \003(\0132\031.voldemort.Par" +
+ "titionTuple\"\244\002\n\034FetchPartitionEntriesReq" +
+ "uest\0227\n\024replica_to_partition\030\001 \003(\0132\031.vol" +
+ "demort.PartitionTuple\022\r\n\005store\030\002 \002(\t\022*\n\006" +
+ "filter\030\003 \001(\0132\032.voldemort.VoldemortFilter",
+ "\022\024\n\014fetch_values\030\004 \001(\010\022*\n\"OBSOLETE__DO_N" +
+ "OT_USE__skip_records\030\005 \001(\003\022\027\n\017initial_cl" +
+ "uster\030\006 \001(\t\022\026\n\016fetch_orphaned\030\007 \001(\010\022\035\n\025r" +
+ "ecords_per_partition\030\010 \001(\003\"\201\001\n\035FetchPart" +
+ "itionEntriesResponse\0222\n\017partition_entry\030" +
+ "\001 \001(\0132\031.voldemort.PartitionEntry\022\013\n\003key\030" +
+ "\002 \001(\014\022\037\n\005error\030\003 \001(\0132\020.voldemort.Error\"\254" +
+ "\001\n\035DeletePartitionEntriesRequest\022\r\n\005stor" +
+ "e\030\001 \002(\t\0227\n\024replica_to_partition\030\002 \003(\0132\031." +
+ "voldemort.PartitionTuple\022*\n\006filter\030\003 \001(\013",
+ "2\032.voldemort.VoldemortFilter\022\027\n\017initial_" +
+ "cluster\030\004 \001(\t\"P\n\036DeletePartitionEntriesR" +
+ "esponse\022\r\n\005count\030\001 \001(\003\022\037\n\005error\030\002 \001(\0132\020." +
+ "voldemort.Error\"\317\001\n\035InitiateFetchAndUpda" +
+ "teRequest\022\017\n\007node_id\030\001 \002(\005\022\r\n\005store\030\002 \002(" +
+ "\t\022*\n\006filter\030\003 \001(\0132\032.voldemort.VoldemortF" +
+ "ilter\0227\n\024replica_to_partition\030\004 \003(\0132\031.vo" +
+ "ldemort.PartitionTuple\022\027\n\017initial_cluste" +
+ "r\030\005 \001(\t\022\020\n\010optimize\030\006 \001(\010\"1\n\033AsyncOperat" +
+ "ionStatusRequest\022\022\n\nrequest_id\030\001 \002(\005\"/\n\031",
+ "AsyncOperationStopRequest\022\022\n\nrequest_id\030" +
+ "\001 \002(\005\"=\n\032AsyncOperationStopResponse\022\037\n\005e" +
+ "rror\030\001 \001(\0132\020.voldemort.Error\"2\n\031AsyncOpe" +
+ "rationListRequest\022\025\n\rshow_complete\030\002 \002(\010" +
+ "\"R\n\032AsyncOperationListResponse\022\023\n\013reques" +
+ "t_ids\030\001 \003(\005\022\037\n\005error\030\002 \001(\0132\020.voldemort.E" +
+ "rror\":\n\016PartitionTuple\022\024\n\014replica_type\030\001" +
+ " \002(\005\022\022\n\npartitions\030\002 \003(\005\"e\n\026PerStorePart" +
+ "itionTuple\022\022\n\nstore_name\030\001 \002(\t\0227\n\024replic" +
+ "a_to_partition\030\002 \003(\0132\031.voldemort.Partiti",
+ "onTuple\"\370\001\n\031RebalancePartitionInfoMap\022\022\n" +
+ "\nstealer_id\030\001 \002(\005\022\020\n\010donor_id\030\002 \002(\005\022\017\n\007a" +
+ "ttempt\030\003 \002(\005\022C\n\030replica_to_add_partition" +
+ "\030\004 \003(\0132!.voldemort.PerStorePartitionTupl" +
+ "e\022F\n\033replica_to_delete_partition\030\005 \003(\0132!" +
+ ".voldemort.PerStorePartitionTuple\022\027\n\017ini" +
+ "tial_cluster\030\006 \002(\t\"f\n\034InitiateRebalanceN" +
+ "odeRequest\022F\n\030rebalance_partition_info\030\001" +
+ " \002(\0132$.voldemort.RebalancePartitionInfoM" +
+ "ap\"m\n#InitiateRebalanceNodeOnDonorReques",
+ "t\022F\n\030rebalance_partition_info\030\001 \003(\0132$.vo" +
+ "ldemort.RebalancePartitionInfoMap\"\212\001\n\034As" +
+ "yncOperationStatusResponse\022\022\n\nrequest_id" +
+ "\030\001 \001(\005\022\023\n\013description\030\002 \001(\t\022\016\n\006status\030\003 " +
+ "\001(\t\022\020\n\010complete\030\004 \001(\010\022\037\n\005error\030\005 \001(\0132\020.v" +
+ "oldemort.Error\"\'\n\026TruncateEntriesRequest" +
+ "\022\r\n\005store\030\001 \002(\t\":\n\027TruncateEntriesRespon" +
+ "se\022\037\n\005error\030\001 \001(\0132\020.voldemort.Error\"*\n\017A" +
+ "ddStoreRequest\022\027\n\017storeDefinition\030\001 \002(\t\"" +
+ "3\n\020AddStoreResponse\022\037\n\005error\030\001 \001(\0132\020.vol",
+ "demort.Error\"\'\n\022DeleteStoreRequest\022\021\n\tst" +
+ "oreName\030\001 \002(\t\"6\n\023DeleteStoreResponse\022\037\n\005" +
+ "error\030\001 \001(\0132\020.voldemort.Error\"P\n\021FetchSt" +
+ "oreRequest\022\022\n\nstore_name\030\001 \002(\t\022\021\n\tstore_" +
+ "dir\030\002 \002(\t\022\024\n\014push_version\030\003 \001(\003\"9\n\020SwapS" +
+ "toreRequest\022\022\n\nstore_name\030\001 \002(\t\022\021\n\tstore" +
+ "_dir\030\002 \002(\t\"P\n\021SwapStoreResponse\022\037\n\005error" +
+ "\030\001 \001(\0132\020.voldemort.Error\022\032\n\022previous_sto" +
+ "re_dir\030\002 \001(\t\"@\n\024RollbackStoreRequest\022\022\n\n" +
+ "store_name\030\001 \002(\t\022\024\n\014push_version\030\002 \002(\003\"8",
+ "\n\025RollbackStoreResponse\022\037\n\005error\030\001 \001(\0132\020" +
+ ".voldemort.Error\"&\n\020RepairJobRequest\022\022\n\n" +
+ "store_name\030\001 \001(\t\"4\n\021RepairJobResponse\022\037\n" +
+ "\005error\030\001 \001(\0132\020.voldemort.Error\"=\n\024ROStor" +
+ "eVersionDirMap\022\022\n\nstore_name\030\001 \002(\t\022\021\n\tst" +
+ "ore_dir\030\002 \002(\t\"/\n\031GetROMaxVersionDirReque" +
+ "st\022\022\n\nstore_name\030\001 \003(\t\"y\n\032GetROMaxVersio" +
+ "nDirResponse\022:\n\021ro_store_versions\030\001 \003(\0132" +
+ "\037.voldemort.ROStoreVersionDirMap\022\037\n\005erro" +
+ "r\030\002 \001(\0132\020.voldemort.Error\"3\n\035GetROCurren",
+ "tVersionDirRequest\022\022\n\nstore_name\030\001 \003(\t\"}" +
+ "\n\036GetROCurrentVersionDirResponse\022:\n\021ro_s" +
+ "tore_versions\030\001 \003(\0132\037.voldemort.ROStoreV" +
+ "ersionDirMap\022\037\n\005error\030\002 \001(\0132\020.voldemort." +
+ "Error\"/\n\031GetROStorageFormatRequest\022\022\n\nst" +
+ "ore_name\030\001 \003(\t\"y\n\032GetROStorageFormatResp" +
+ "onse\022:\n\021ro_store_versions\030\001 \003(\0132\037.voldem" +
+ "ort.ROStoreVersionDirMap\022\037\n\005error\030\002 \001(\0132" +
+ "\020.voldemort.Error\"@\n\027FailedFetchStoreReq" +
+ "uest\022\022\n\nstore_name\030\001 \002(\t\022\021\n\tstore_dir\030\002 ",
+ "\002(\t\";\n\030FailedFetchStoreResponse\022\037\n\005error" +
+ "\030\001 \001(\0132\020.voldemort.Error\"\375\001\n\033RebalanceSt" +
+ "ateChangeRequest\022K\n\035rebalance_partition_" +
+ "info_list\030\001 \003(\0132$.voldemort.RebalancePar" +
+ "titionInfoMap\022\026\n\016cluster_string\030\002 \002(\t\022\025\n" +
+ "\rstores_string\030\003 \002(\t\022\017\n\007swap_ro\030\004 \002(\010\022\037\n" +
+ "\027change_cluster_metadata\030\005 \002(\010\022\036\n\026change" +
+ "_rebalance_state\030\006 \002(\010\022\020\n\010rollback\030\007 \002(\010" +
+ "\"?\n\034RebalanceStateChangeResponse\022\037\n\005erro" +
+ "r\030\001 \001(\0132\020.voldemort.Error\"G\n DeleteStore",
+ "RebalanceStateRequest\022\022\n\nstore_name\030\001 \002(" +
+ "\t\022\017\n\007node_id\030\002 \002(\005\"D\n!DeleteStoreRebalan" +
+ "ceStateResponse\022\037\n\005error\030\001 \001(\0132\020.voldemo" +
+ "rt.Error\"h\n\023NativeBackupRequest\022\022\n\nstore" +
+ "_name\030\001 \002(\t\022\022\n\nbackup_dir\030\002 \002(\t\022\024\n\014verif" +
+ "y_files\030\003 \002(\010\022\023\n\013incremental\030\004 \002(\010\">\n\024Re" +
+ "serveMemoryRequest\022\022\n\nstore_name\030\001 \002(\t\022\022" +
+ "\n\nsize_in_mb\030\002 \002(\003\"8\n\025ReserveMemoryRespo" +
+ "nse\022\037\n\005error\030\001 \001(\0132\020.voldemort.Error\"\360\016\n" +
+ "\025VoldemortAdminRequest\022)\n\004type\030\001 \002(\0162\033.v",
+ "oldemort.AdminRequestType\0223\n\014get_metadat" +
+ "a\030\002 \001(\0132\035.voldemort.GetMetadataRequest\0229" +
+ "\n\017update_metadata\030\003 \001(\0132 .voldemort.Upda" +
+ "teMetadataRequest\022J\n\030update_partition_en" +
+ "tries\030\004 \001(\0132(.voldemort.UpdatePartitionE" +
+ "ntriesRequest\022H\n\027fetch_partition_entries" +
+ "\030\005 \001(\0132\'.voldemort.FetchPartitionEntries" +
+ "Request\022J\n\030delete_partition_entries\030\006 \001(" +
+ "\0132(.voldemort.DeletePartitionEntriesRequ" +
+ "est\022K\n\031initiate_fetch_and_update\030\007 \001(\0132(",
+ ".voldemort.InitiateFetchAndUpdateRequest" +
+ "\022F\n\026async_operation_status\030\010 \001(\0132&.volde" +
+ "mort.AsyncOperationStatusRequest\022H\n\027init" +
+ "iate_rebalance_node\030\t \001(\0132\'.voldemort.In" +
+ "itiateRebalanceNodeRequest\022B\n\024async_oper" +
+ "ation_stop\030\n \001(\0132$.voldemort.AsyncOperat" +
+ "ionStopRequest\022B\n\024async_operation_list\030\013" +
+ " \001(\0132$.voldemort.AsyncOperationListReque" +
+ "st\022;\n\020truncate_entries\030\014 \001(\0132!.voldemort" +
+ ".TruncateEntriesRequest\022-\n\tadd_store\030\r \001",
+ "(\0132\032.voldemort.AddStoreRequest\0223\n\014delete" +
+ "_store\030\016 \001(\0132\035.voldemort.DeleteStoreRequ" +
+ "est\0221\n\013fetch_store\030\017 \001(\0132\034.voldemort.Fet" +
+ "chStoreRequest\022/\n\nswap_store\030\020 \001(\0132\033.vol" +
+ "demort.SwapStoreRequest\0227\n\016rollback_stor" +
+ "e\030\021 \001(\0132\037.voldemort.RollbackStoreRequest" +
+ "\022D\n\026get_ro_max_version_dir\030\022 \001(\0132$.volde" +
+ "mort.GetROMaxVersionDirRequest\022L\n\032get_ro" +
+ "_current_version_dir\030\023 \001(\0132(.voldemort.G" +
+ "etROCurrentVersionDirRequest\022D\n\025fetch_pa",
+ "rtition_files\030\024 \001(\0132%.voldemort.FetchPar" +
+ "titionFilesRequest\022@\n\023update_slop_entrie" +
+ "s\030\026 \001(\0132#.voldemort.UpdateSlopEntriesReq" +
+ "uest\022>\n\022failed_fetch_store\030\030 \001(\0132\".volde" +
+ "mort.FailedFetchStoreRequest\022C\n\025get_ro_s" +
+ "torage_format\030\031 \001(\0132$.voldemort.GetROSto" +
+ "rageFormatRequest\022F\n\026rebalance_state_cha" +
+ "nge\030\032 \001(\0132&.voldemort.RebalanceStateChan" +
+ "geRequest\022/\n\nrepair_job\030\033 \001(\0132\033.voldemor" +
+ "t.RepairJobRequest\022X\n initiate_rebalance",
+ "_node_on_donor\030\034 \001(\0132..voldemort.Initiat" +
+ "eRebalanceNodeOnDonorRequest\022Q\n\034delete_s" +
+ "tore_rebalance_state\030\035 \001(\0132+.voldemort.D" +
+ "eleteStoreRebalanceStateRequest\0225\n\rnativ" +
+ "e_backup\030\036 \001(\0132\036.voldemort.NativeBackupR" +
+ "equest\0227\n\016reserve_memory\030\037 \001(\0132\037.voldemo" +
+ "rt.ReserveMemoryRequest*\310\005\n\020AdminRequest" +
+ "Type\022\020\n\014GET_METADATA\020\000\022\023\n\017UPDATE_METADAT" +
+ "A\020\001\022\034\n\030UPDATE_PARTITION_ENTRIES\020\002\022\033\n\027FET" +
+ "CH_PARTITION_ENTRIES\020\003\022\034\n\030DELETE_PARTITI",
+ "ON_ENTRIES\020\004\022\035\n\031INITIATE_FETCH_AND_UPDAT" +
+ "E\020\005\022\032\n\026ASYNC_OPERATION_STATUS\020\006\022\033\n\027INITI" +
+ "ATE_REBALANCE_NODE\020\007\022\030\n\024ASYNC_OPERATION_" +
+ "STOP\020\010\022\030\n\024ASYNC_OPERATION_LIST\020\t\022\024\n\020TRUN" +
+ "CATE_ENTRIES\020\n\022\r\n\tADD_STORE\020\013\022\020\n\014DELETE_" +
+ "STORE\020\014\022\017\n\013FETCH_STORE\020\r\022\016\n\nSWAP_STORE\020\016" +
+ "\022\022\n\016ROLLBACK_STORE\020\017\022\032\n\026GET_RO_MAX_VERSI" +
+ "ON_DIR\020\020\022\036\n\032GET_RO_CURRENT_VERSION_DIR\020\021" +
+ "\022\031\n\025FETCH_PARTITION_FILES\020\022\022\027\n\023UPDATE_SL" +
+ "OP_ENTRIES\020\024\022\026\n\022FAILED_FETCH_STORE\020\026\022\031\n\025",
+ "GET_RO_STORAGE_FORMAT\020\027\022\032\n\026REBALANCE_STA" +
+ "TE_CHANGE\020\030\022\016\n\nREPAIR_JOB\020\031\022$\n INITIATE_" +
+ "REBALANCE_NODE_ON_DONOR\020\032\022 \n\034DELETE_STOR" +
+ "E_REBALANCE_STATE\020\033\022\021\n\rNATIVE_BACKUP\020\034\022\022" +
+ "\n\016RESERVE_MEMORY\020\035B-\n\034voldemort.client.p" +
+ "rotocol.pbB\013VAdminProtoH\001"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -23360,7 +23383,7 @@ public com.google.protobuf.ExtensionRegistry assignDescriptors(
internal_static_voldemort_UpdateMetadataRequest_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_voldemort_UpdateMetadataRequest_descriptor,
- new java.lang.String[] { "Key", "Versioned", },
+ new java.lang.String[] { "MetadataEntry", },
voldemort.client.protocol.pb.VAdminProto.UpdateMetadataRequest.class,
voldemort.client.protocol.pb.VAdminProto.UpdateMetadataRequest.Builder.class);
internal_static_voldemort_UpdateMetadataResponse_descriptor =
@@ -23744,7 +23767,7 @@ public com.google.protobuf.ExtensionRegistry assignDescriptors(
internal_static_voldemort_RebalanceStateChangeRequest_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_voldemort_RebalanceStateChangeRequest_descriptor,
- new java.lang.String[] { "RebalancePartitionInfoList", "ClusterString", "SwapRo", "ChangeClusterMetadata", "ChangeRebalanceState", "Rollback", },
+ new java.lang.String[] { "RebalancePartitionInfoList", "ClusterString", "StoresString", "SwapRo", "ChangeClusterMetadata", "ChangeRebalanceState", "Rollback", },
voldemort.client.protocol.pb.VAdminProto.RebalanceStateChangeRequest.class,
voldemort.client.protocol.pb.VAdminProto.RebalanceStateChangeRequest.Builder.class);
internal_static_voldemort_RebalanceStateChangeResponse_descriptor =
diff --git a/src/java/voldemort/client/rebalance/RebalanceController.java b/src/java/voldemort/client/rebalance/RebalanceController.java
index fb52897487..b2789d5202 100644
--- a/src/java/voldemort/client/rebalance/RebalanceController.java
+++ b/src/java/voldemort/client/rebalance/RebalanceController.java
@@ -30,6 +30,7 @@
import voldemort.VoldemortException;
import voldemort.client.ClientConfig;
+import voldemort.client.protocol.RequestFormatType;
import voldemort.client.protocol.admin.AdminClient;
import voldemort.client.rebalance.task.DonorBasedRebalanceTask;
import voldemort.client.rebalance.task.RebalanceTask;
@@ -58,7 +59,9 @@ public class RebalanceController {
private final RebalanceClientConfig rebalanceConfig;
public RebalanceController(String bootstrapUrl, RebalanceClientConfig rebalanceConfig) {
- this.adminClient = new AdminClient(bootstrapUrl, rebalanceConfig, new ClientConfig());
+ this.adminClient = new AdminClient(bootstrapUrl,
+ rebalanceConfig,
+ new ClientConfig().setRequestFormatType(RequestFormatType.PROTOCOL_BUFFERS));
this.rebalanceConfig = rebalanceConfig;
}
@@ -386,6 +389,7 @@ private void rebalancePerPartitionTransition(final OrderedClusterTransition orde
// Flatten the node plans to partition plans
List rebalancePartitionPlanList = rebalancePartitionsInfoList;
+ List allStoreDefs = orderedClusterTransition.getStoreDefs();
// Split the store definitions
List readOnlyStoreDefs = StoreDefinitionUtils.filterStores(orderedClusterTransition.getStoreDefs(),
true);
@@ -400,9 +404,14 @@ private void rebalancePerPartitionTransition(final OrderedClusterTransition orde
List filteredRebalancePartitionPlanList = RebalanceUtils.filterPartitionPlanWithStores(rebalancePartitionPlanList,
readOnlyStoreDefs);
+ // TODO this method right nowtakes just the source stores definition
+ // the 2nd argument needs to be fixed
+ // ATTENTION JAY
rebalanceStateChange(orderedClusterTransition.getId(),
orderedClusterTransition.getCurrentCluster(),
orderedClusterTransition.getTargetCluster(),
+ allStoreDefs,
+ allStoreDefs,
filteredRebalancePartitionPlanList,
hasReadOnlyStores,
hasReadWriteStores,
@@ -423,9 +432,14 @@ private void rebalancePerPartitionTransition(final OrderedClusterTransition orde
filteredRebalancePartitionPlanList = RebalanceUtils.filterPartitionPlanWithStores(rebalancePartitionPlanList,
readWriteStoreDefs);
+ // TODO this method right nowtakes just the source stores definition
+ // the 2nd argument needs to be fixed
+ // ATTENTION JAY
rebalanceStateChange(orderedClusterTransition.getId(),
orderedClusterTransition.getCurrentCluster(),
orderedClusterTransition.getTargetCluster(),
+ allStoreDefs,
+ allStoreDefs,
filteredRebalancePartitionPlanList,
hasReadOnlyStores,
hasReadWriteStores,
@@ -460,6 +474,8 @@ private void rebalancePerPartitionTransition(final OrderedClusterTransition orde
}
/**
+ * TODO JAY -- This interface expects the source stores definition and
+ * target stores def
*
* Perform a group of state change actions. Also any errors + rollback
* procedures are performed at this level itself.
@@ -490,6 +506,8 @@ private void rebalancePerPartitionTransition(final OrderedClusterTransition orde
private void rebalanceStateChange(final int taskId,
Cluster currentCluster,
Cluster transitionCluster,
+ List existingStoreDefs,
+ List targetStoreDefs,
List rebalancePartitionPlanList,
boolean hasReadOnlyStores,
boolean hasReadWriteStores,
@@ -511,6 +529,8 @@ private void rebalanceStateChange(final int taskId,
if(!rebalanceConfig.isShowPlanEnabled())
adminClient.rebalanceOps.rebalanceStateChange(currentCluster,
transitionCluster,
+ existingStoreDefs,
+ targetStoreDefs,
rebalancePartitionPlanList,
false,
true,
@@ -523,6 +543,8 @@ private void rebalanceStateChange(final int taskId,
if(!rebalanceConfig.isShowPlanEnabled())
adminClient.rebalanceOps.rebalanceStateChange(currentCluster,
transitionCluster,
+ existingStoreDefs,
+ targetStoreDefs,
rebalancePartitionPlanList,
false,
false,
@@ -535,6 +557,8 @@ private void rebalanceStateChange(final int taskId,
if(!rebalanceConfig.isShowPlanEnabled())
adminClient.rebalanceOps.rebalanceStateChange(currentCluster,
transitionCluster,
+ existingStoreDefs,
+ targetStoreDefs,
rebalancePartitionPlanList,
true,
true,
@@ -549,6 +573,8 @@ private void rebalanceStateChange(final int taskId,
if(!rebalanceConfig.isShowPlanEnabled())
adminClient.rebalanceOps.rebalanceStateChange(currentCluster,
transitionCluster,
+ existingStoreDefs,
+ targetStoreDefs,
rebalancePartitionPlanList,
true,
true,
@@ -676,8 +702,15 @@ private void rebalancePerTaskTransition(final int taskId,
if(hasReadOnlyStores && hasReadWriteStores && finishedReadOnlyStores) {
// Case 0
- adminClient.rebalanceOps.rebalanceStateChange(null,
- currentCluster,
+
+ // TODO this method right nowtakes just the source stores
+ // definition
+ // the 2nd argument needs to be fixed
+ // ATTENTION JAY
+ adminClient.rebalanceOps.rebalanceStateChange(null, currentCluster, null, null, // pass
+ // current
+ // store
+ // def
null,
true,
true,
@@ -686,8 +719,15 @@ private void rebalancePerTaskTransition(final int taskId,
false);
} else if(hasReadWriteStores && finishedReadOnlyStores) {
// Case 4
- adminClient.rebalanceOps.rebalanceStateChange(null,
- currentCluster,
+
+ // TODO this method right nowtakes just the source stores
+ // definition
+ // the 2nd argument needs to be fixed
+ // ATTENTION JAY
+ adminClient.rebalanceOps.rebalanceStateChange(null, currentCluster, null, null, // pass
+ // current
+ // store
+ // def
null,
false,
true,
diff --git a/src/java/voldemort/client/rebalance/RebalancePartitionsInfo.java b/src/java/voldemort/client/rebalance/RebalancePartitionsInfo.java
index 330a81d021..beb69ef3c0 100644
--- a/src/java/voldemort/client/rebalance/RebalancePartitionsInfo.java
+++ b/src/java/voldemort/client/rebalance/RebalancePartitionsInfo.java
@@ -131,12 +131,16 @@ public static RebalancePartitionsInfo create(Map, ?> map) {
List partitionList = Utils.uncheckedCast(map.get(unbalancedStore
+ "replicaToAddPartitionList"
+ Integer.toString(replicaNo)));
+ // TODO there is a potential NPE hiding here that might fail
+ // rebalancing tests
if(partitionList.size() > 0)
replicaToAddPartition.put(replicaNo, partitionList);
List deletePartitionList = Utils.uncheckedCast(map.get(unbalancedStore
+ "replicaToDeletePartitionList"
+ Integer.toString(replicaNo)));
+ // TODO there is a potential NPE hiding here that might fail
+ // rebalancing tests
if(deletePartitionList.size() > 0)
replicaToDeletePartitionList.put(replicaNo, deletePartitionList);
}
@@ -156,7 +160,7 @@ public static RebalancePartitionsInfo create(Map, ?> map) {
attempt);
}
- public ImmutableMap asMap() {
+ public synchronized ImmutableMap asMap() {
ImmutableMap.Builder builder = new ImmutableMap.Builder();
builder.put("stealerId", stealerId)
@@ -199,23 +203,23 @@ public ImmutableMap asMap() {
return builder.build();
}
- public void setAttempt(int attempt) {
+ public synchronized void setAttempt(int attempt) {
this.attempt = attempt;
}
- public int getDonorId() {
+ public synchronized int getDonorId() {
return donorId;
}
- public int getAttempt() {
+ public synchronized int getAttempt() {
return attempt;
}
- public int getStealerId() {
+ public synchronized int getStealerId() {
return stealerId;
}
- public Cluster getInitialCluster() {
+ public synchronized Cluster getInitialCluster() {
return initialCluster;
}
@@ -225,35 +229,35 @@ public Cluster getInitialCluster() {
*
* @return Set of store names
*/
- public Set getUnbalancedStoreList() {
+ public synchronized Set getUnbalancedStoreList() {
return storeToReplicaToAddPartitionList.keySet();
}
- public HashMap>> getStoreToReplicaToAddPartitionList() {
+ public synchronized HashMap>> getStoreToReplicaToAddPartitionList() {
return this.storeToReplicaToAddPartitionList;
}
- public HashMap>> getStoreToReplicaToDeletePartitionList() {
+ public synchronized HashMap>> getStoreToReplicaToDeletePartitionList() {
return this.storeToReplicaToDeletePartitionList;
}
- public HashMap> getReplicaToAddPartitionList(String storeName) {
+ public synchronized HashMap> getReplicaToAddPartitionList(String storeName) {
return this.storeToReplicaToAddPartitionList.get(storeName);
}
- public HashMap> getReplicaToDeletePartitionList(String storeName) {
+ public synchronized HashMap> getReplicaToDeletePartitionList(String storeName) {
return this.storeToReplicaToDeletePartitionList.get(storeName);
}
- public void setStoreToReplicaToAddPartitionList(HashMap>> storeToReplicaToAddPartitionList) {
+ public synchronized void setStoreToReplicaToAddPartitionList(HashMap>> storeToReplicaToAddPartitionList) {
this.storeToReplicaToAddPartitionList = storeToReplicaToAddPartitionList;
}
- public void setStoreToReplicaToDeletePartitionList(HashMap>> storeToReplicaToDeletePartitionList) {
+ public synchronized void setStoreToReplicaToDeletePartitionList(HashMap>> storeToReplicaToDeletePartitionList) {
this.storeToReplicaToDeletePartitionList = storeToReplicaToDeletePartitionList;
}
- public void removeStore(String storeName) {
+ public synchronized void removeStore(String storeName) {
this.storeToReplicaToAddPartitionList.remove(storeName);
this.storeToReplicaToDeletePartitionList.remove(storeName);
}
@@ -263,7 +267,7 @@ public void removeStore(String storeName) {
*
* @return List of primary partitions
*/
- public List getStealMasterPartitions() {
+ public synchronized List getStealMasterPartitions() {
Iterator>> iter = storeToReplicaToAddPartitionList.values()
.iterator();
List primaryPartitionsBeingMoved = Lists.newArrayList();
@@ -276,7 +280,7 @@ public List getStealMasterPartitions() {
}
@Override
- public String toString() {
+ public synchronized String toString() {
StringBuffer sb = new StringBuffer();
sb.append("\nRebalancePartitionsInfo(" + getStealerId() + " ["
+ initialCluster.getNodeById(getStealerId()).getHost() + "] <--- " + getDonorId()
@@ -307,7 +311,7 @@ public String toString() {
return sb.toString();
}
- public String toJsonString() {
+ public synchronized String toJsonString() {
Map map = asMap();
StringWriter writer = new StringWriter();
@@ -317,7 +321,7 @@ public String toJsonString() {
}
@Override
- public boolean equals(Object o) {
+ public synchronized boolean equals(Object o) {
if(this == o)
return true;
if(o == null || getClass() != o.getClass())
@@ -344,7 +348,7 @@ public boolean equals(Object o) {
}
@Override
- public int hashCode() {
+ public synchronized int hashCode() {
int result = stealerId;
result = 31 * result + donorId;
result = 31 * result + initialCluster.hashCode();
diff --git a/src/java/voldemort/cluster/Zone.java b/src/java/voldemort/cluster/Zone.java
index 047aa1f35a..874db6e876 100644
--- a/src/java/voldemort/cluster/Zone.java
+++ b/src/java/voldemort/cluster/Zone.java
@@ -18,7 +18,7 @@ public Zone(int zoneId, LinkedList proximityList) {
}
public Zone() {
- this.zoneId = 0;
+ this.zoneId = DEFAULT_ZONE_ID;
this.proximityList = new LinkedList();
}
diff --git a/src/java/voldemort/coordinator/CoordinatorConfig.java b/src/java/voldemort/coordinator/CoordinatorConfig.java
index 670f278b6d..2f2b32f1b1 100644
--- a/src/java/voldemort/coordinator/CoordinatorConfig.java
+++ b/src/java/voldemort/coordinator/CoordinatorConfig.java
@@ -1,3 +1,19 @@
+/*
+ * Copyright 2013 LinkedIn, Inc
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
package voldemort.coordinator;
import java.io.BufferedInputStream;
@@ -18,18 +34,16 @@ public class CoordinatorConfig {
private volatile List bootstrapURLs = null;
private volatile String fatClientConfigPath = null;
- private volatile int fatClientWrapperMaxPoolSize = 20;
- private volatile int fatClientWrapperCorePoolSize = 20;
- private volatile int fatClientWrapperKeepAliveInSecs = 60;
private volatile int metadataCheckIntervalInMs = 5000;
+ private volatile int nettyServerPort = 8080;
+ private volatile int nettyServerBacklog = 1000;
/* Propery names for propery-based configuration */
public static final String BOOTSTRAP_URLS_PROPERTY = "bootstrap_urls";
public static final String FAT_CLIENTS_CONFIG_FILE_PATH_PROPERTY = "fat_clients_config_file_path";
- public static final String FAT_CLIENT_WRAPPER_MAX_POOL_SIZE_PROPERTY = "fat_client_wrapper_max_pool_size";
- public static final String FAT_CLIENT_WRAPPER_CORE_POOL_SIZE_PROPERTY = "fat_client_wrapper_core_pool_size";
- public static final String FAT_CLIENT_WRAPPER_POOL_KEEPALIVE_IN_SECS = "fat_client_wrapper_pool_keepalive_in_secs";
public static final String METADATA_CHECK_INTERVAL_IN_MS = "metadata_check_interval_in_ms";
+ public static final String NETTY_SERVER_PORT = "netty_server_port";
+ public static final String NETTY_SERVER_BACKLOG = "netty_server_backlog";
/**
* Instantiate the coordinator config using a properties file
@@ -61,6 +75,17 @@ public CoordinatorConfig(Properties properties) {
setProperties(properties);
}
+ /**
+ * Dummy constructor for testing purposes
+ */
+ public CoordinatorConfig() {}
+
+ /**
+ * Set the values using the specified Properties object
+ *
+ * @param properties Properties object containing specific property values
+ * for the Coordinator config
+ */
private void setProperties(Properties properties) {
Props props = new Props(properties);
if(props.containsKey(BOOTSTRAP_URLS_PROPERTY)) {
@@ -71,24 +96,17 @@ private void setProperties(Properties properties) {
setFatClientConfigPath(props.getString(FAT_CLIENTS_CONFIG_FILE_PATH_PROPERTY));
}
- if(props.containsKey(FAT_CLIENT_WRAPPER_CORE_POOL_SIZE_PROPERTY)) {
- setFatClientWrapperCorePoolSize(props.getInt(FAT_CLIENT_WRAPPER_CORE_POOL_SIZE_PROPERTY,
- this.fatClientWrapperCorePoolSize));
- }
-
- if(props.containsKey(FAT_CLIENT_WRAPPER_MAX_POOL_SIZE_PROPERTY)) {
- setFatClientWrapperMaxPoolSize(props.getInt(FAT_CLIENT_WRAPPER_MAX_POOL_SIZE_PROPERTY,
- this.fatClientWrapperMaxPoolSize));
+ if(props.containsKey(METADATA_CHECK_INTERVAL_IN_MS)) {
+ setMetadataCheckIntervalInMs(props.getInt(METADATA_CHECK_INTERVAL_IN_MS,
+ this.metadataCheckIntervalInMs));
}
- if(props.containsKey(FAT_CLIENT_WRAPPER_POOL_KEEPALIVE_IN_SECS)) {
- setFatClientWrapperKeepAliveInSecs(props.getInt(FAT_CLIENT_WRAPPER_POOL_KEEPALIVE_IN_SECS,
- this.fatClientWrapperKeepAliveInSecs));
+ if(props.containsKey(NETTY_SERVER_PORT)) {
+ setMetadataCheckIntervalInMs(props.getInt(NETTY_SERVER_PORT, this.nettyServerPort));
}
- if(props.containsKey(METADATA_CHECK_INTERVAL_IN_MS)) {
- setMetadataCheckIntervalInMs(props.getInt(METADATA_CHECK_INTERVAL_IN_MS,
- this.metadataCheckIntervalInMs));
+ if(props.containsKey(NETTY_SERVER_BACKLOG)) {
+ setMetadataCheckIntervalInMs(props.getInt(NETTY_SERVER_BACKLOG, this.nettyServerBacklog));
}
}
@@ -98,6 +116,14 @@ public String[] getBootstrapURLs() {
return this.bootstrapURLs.toArray(new String[this.bootstrapURLs.size()]);
}
+ /**
+ * Sets the bootstrap URLs used by the different Fat clients inside the
+ * Coordinator
+ *
+ * @param bootstrapUrls list of bootstrap URLs defining which cluster to
+ * connect to
+ * @return
+ */
public CoordinatorConfig setBootstrapURLs(List bootstrapUrls) {
this.bootstrapURLs = Utils.notNull(bootstrapUrls);
if(this.bootstrapURLs.size() <= 0)
@@ -109,40 +135,52 @@ public String getFatClientConfigPath() {
return fatClientConfigPath;
}
+ /**
+ * Defines individual config for each of the fat clients managed by the
+ * Coordinator
+ *
+ * @param fatClientConfigPath The path of the file containing the fat client
+ * config in Avro format
+ */
public void setFatClientConfigPath(String fatClientConfigPath) {
this.fatClientConfigPath = fatClientConfigPath;
}
- public int getFatClientWrapperMaxPoolSize() {
- return fatClientWrapperMaxPoolSize;
- }
-
- public void setFatClientWrapperMaxPoolSize(int fatClientWrapperMaxPoolSize) {
- this.fatClientWrapperMaxPoolSize = fatClientWrapperMaxPoolSize;
- }
-
- public int getFatClientWrapperCorePoolSize() {
- return fatClientWrapperCorePoolSize;
+ public int getMetadataCheckIntervalInMs() {
+ return metadataCheckIntervalInMs;
}
- public void setFatClientWrapperCorePoolSize(int fatClientWrapperCorePoolSize) {
- this.fatClientWrapperCorePoolSize = fatClientWrapperCorePoolSize;
+ /**
+ * @param metadataCheckIntervalInMs Defines the frequency with which to
+ * check for updates in the cluster metadata (Eg: cluster.xml and
+ * stores.xml)
+ */
+ public void setMetadataCheckIntervalInMs(int metadataCheckIntervalInMs) {
+ this.metadataCheckIntervalInMs = metadataCheckIntervalInMs;
}
- public int getFatClientWrapperKeepAliveInSecs() {
- return fatClientWrapperKeepAliveInSecs;
+ public int getServerPort() {
+ return nettyServerPort;
}
- public void setFatClientWrapperKeepAliveInSecs(int fatClientWrapperKeepAliveInSecs) {
- this.fatClientWrapperKeepAliveInSecs = fatClientWrapperKeepAliveInSecs;
+ /**
+ * @param serverPort Defines the port to use while bootstrapping the Netty
+ * server
+ */
+ public void setServerPort(int serverPort) {
+ this.nettyServerPort = serverPort;
}
- public int getMetadataCheckIntervalInMs() {
- return metadataCheckIntervalInMs;
+ public int getNettyServerBacklog() {
+ return nettyServerBacklog;
}
- public void setMetadataCheckIntervalInMs(int metadataCheckIntervalInMs) {
- this.metadataCheckIntervalInMs = metadataCheckIntervalInMs;
+ /**
+ * @param nettyServerBacklog Defines the netty server backlog value
+ *
+ */
+ public void setNettyServerBacklog(int nettyServerBacklog) {
+ this.nettyServerBacklog = nettyServerBacklog;
}
}
diff --git a/src/java/voldemort/coordinator/CoordinatorErrorStats.java b/src/java/voldemort/coordinator/CoordinatorErrorStats.java
new file mode 100644
index 0000000000..96a55ca917
--- /dev/null
+++ b/src/java/voldemort/coordinator/CoordinatorErrorStats.java
@@ -0,0 +1,52 @@
+package voldemort.coordinator;
+
+import java.util.concurrent.RejectedExecutionException;
+import java.util.concurrent.atomic.AtomicLong;
+
+import voldemort.VoldemortException;
+import voldemort.annotations.jmx.JmxGetter;
+import voldemort.store.InsufficientOperationalNodesException;
+import voldemort.store.InsufficientZoneResponsesException;
+import voldemort.store.InvalidMetadataException;
+import voldemort.store.routed.PipelineRoutedStats;
+
+/**
+ * Class to keep track of all the errors in the Coordinator service
+ *
+ */
+public class CoordinatorErrorStats extends PipelineRoutedStats {
+
+ CoordinatorErrorStats() {
+ super();
+ this.errCountMap.put(RejectedExecutionException.class, new AtomicLong(0));
+ this.errCountMap.put(IllegalArgumentException.class, new AtomicLong(0));
+ this.errCountMap.put(VoldemortException.class, new AtomicLong(0));
+ }
+
+ @Override
+ public boolean isSevere(Exception ve) {
+ if(ve instanceof InsufficientOperationalNodesException
+ || ve instanceof InsufficientZoneResponsesException
+ || ve instanceof InvalidMetadataException || ve instanceof RejectedExecutionException
+ || ve instanceof IllegalArgumentException || ve instanceof VoldemortException)
+ return true;
+ else
+ return false;
+ }
+
+ @JmxGetter(name = "numRejectedExecutionExceptions", description = "Number of rejected tasks by the Fat client")
+ public long getNumRejectedExecutionExceptions() {
+ return errCountMap.get(RejectedExecutionException.class).get();
+ }
+
+ @JmxGetter(name = "numIllegalArgumentExceptions", description = "Number of bad requests received by the Coordinator")
+ public long getNumIllegalArgumentExceptions() {
+ return errCountMap.get(IllegalArgumentException.class).get();
+ }
+
+ @JmxGetter(name = "numVoldemortExceptions", description = "Number of failed Voldemort operations")
+ public long getNumVoldemortExceptions() {
+ return errCountMap.get(VoldemortException.class).get();
+ }
+
+}
diff --git a/src/java/voldemort/coordinator/CoordinatorPipelineFactory.java b/src/java/voldemort/coordinator/CoordinatorPipelineFactory.java
index 1646f02130..0252539a57 100644
--- a/src/java/voldemort/coordinator/CoordinatorPipelineFactory.java
+++ b/src/java/voldemort/coordinator/CoordinatorPipelineFactory.java
@@ -36,9 +36,13 @@ public class CoordinatorPipelineFactory implements ChannelPipelineFactory {
private boolean noop = false;
private Map fatClientMap;
+ private CoordinatorErrorStats errorStats = null;
- public CoordinatorPipelineFactory(Map fatClientMap, boolean noop) {
+ public CoordinatorPipelineFactory(Map fatClientMap,
+ CoordinatorErrorStats errorStats,
+ boolean noop) {
this.fatClientMap = fatClientMap;
+ this.errorStats = errorStats;
this.noop = noop;
}
@@ -56,7 +60,8 @@ public ChannelPipeline getPipeline() throws Exception {
if(this.noop) {
pipeline.addLast("handler", new NoopHttpRequestHandler());
} else {
- pipeline.addLast("handler", new VoldemortHttpRequestHandler(this.fatClientMap));
+ pipeline.addLast("handler", new VoldemortHttpRequestHandler(this.fatClientMap,
+ this.errorStats));
}
return pipeline;
}
diff --git a/src/java/voldemort/coordinator/CoordinatorService.java b/src/java/voldemort/coordinator/CoordinatorService.java
index a6a8a508bc..bede057a2e 100644
--- a/src/java/voldemort/coordinator/CoordinatorService.java
+++ b/src/java/voldemort/coordinator/CoordinatorService.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2008-2013 LinkedIn, Inc
+ * Copyright 2013 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -31,6 +31,7 @@
import java.util.Properties;
import java.util.concurrent.Callable;
import java.util.concurrent.Executors;
+import java.util.concurrent.ThreadPoolExecutor;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
@@ -40,8 +41,11 @@
import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
import org.jboss.netty.bootstrap.ServerBootstrap;
+import org.jboss.netty.channel.Channel;
import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory;
+import voldemort.annotations.jmx.JmxGetter;
+import voldemort.annotations.jmx.JmxManaged;
import voldemort.client.ClientConfig;
import voldemort.client.SocketStoreClientFactory;
import voldemort.client.SystemStoreRepository;
@@ -52,6 +56,9 @@
import voldemort.server.VoldemortServer;
import voldemort.store.StoreDefinition;
import voldemort.store.metadata.MetadataStore;
+import voldemort.store.stats.StoreStats;
+import voldemort.store.stats.Tracked;
+import voldemort.utils.JmxUtils;
import voldemort.utils.SystemTime;
import voldemort.utils.Utils;
import voldemort.xml.StoreDefinitionsMapper;
@@ -63,24 +70,33 @@
* clients and invokes the corresponding Fat client API.
*
*/
+@JmxManaged(description = "A Coordinator Service for proxying Voldemort HTTP requests")
public class CoordinatorService extends AbstractService {
- private CoordinatorConfig config = null;
+ private CoordinatorConfig coordinatorConfig = null;
- public CoordinatorService(CoordinatorConfig config) {
- super(ServiceType.COORDINATOR);
- this.config = config;
- }
-
- private static boolean noop = false;
- private static SocketStoreClientFactory storeClientFactory = null;
- private static AsyncMetadataVersionManager asyncMetadataManager = null;
- private static SchedulerService schedulerService = null;
+ private boolean noop = false;
+ private SocketStoreClientFactory storeClientFactory = null;
+ private AsyncMetadataVersionManager asyncMetadataManager = null;
+ private SchedulerService schedulerService = null;
private static final Logger logger = Logger.getLogger(CoordinatorService.class);
- private static Map fatClientMap = null;
+ private Map fatClientMap = null;
public final static Schema CLIENT_CONFIGS_AVRO_SCHEMA = Schema.parse("{ \"name\": \"clientConfigs\", \"type\":\"array\","
+ "\"items\": { \"name\": \"clientConfig\", \"type\": \"map\", \"values\":\"string\" }}}");
private static final String STORE_NAME_KEY = "store_name";
+ protected ThreadPoolExecutor workerPool = null;
+ private final CoordinatorErrorStats errorStats;
+ private final StoreStats coordinatorPerfStats;
+ private ServerBootstrap bootstrap = null;
+ private Channel nettyServerChannel = null;
+
+ public CoordinatorService(CoordinatorConfig config) {
+ super(ServiceType.COORDINATOR);
+ this.coordinatorConfig = config;
+ this.coordinatorPerfStats = new StoreStats();
+ this.errorStats = new CoordinatorErrorStats();
+ RESTErrorHandler.setErrorStatsHandler(errorStats);
+ }
/**
* Initializes all the Fat clients (1 per store) for the cluster that this
@@ -97,8 +113,8 @@ private void initializeFatClients() {
List storeDefList = storeMapper.readStoreList(new StringReader(storesXml),
false);
- Map fatClientConfigMap = readClientConfig(this.config.getFatClientConfigPath(),
- this.config.getBootstrapURLs());
+ Map fatClientConfigMap = readClientConfig(this.coordinatorConfig.getFatClientConfigPath(),
+ this.coordinatorConfig.getBootstrapURLs());
// For now Simply create the map of store definition to
// FatClientWrappers
// TODO: After the fat client improvements is done, modify this to
@@ -111,10 +127,12 @@ private void initializeFatClients() {
logger.info("Creating a Fat client wrapper for store: " + storeName);
logger.info("Using config: " + fatClientConfigMap.get(storeName));
fatClientMap.put(storeName, new FatClientWrapper(storeName,
- this.config,
+ this.coordinatorConfig,
fatClientConfigMap.get(storeName),
storesXml,
- clusterXml));
+ clusterXml,
+ this.errorStats,
+ this.coordinatorPerfStats));
}
}
@@ -123,7 +141,7 @@ protected void startInner() {
// Initialize the Voldemort Metadata
ClientConfig clientConfig = new ClientConfig();
- clientConfig.setBootstrapUrls(this.config.getBootstrapURLs());
+ clientConfig.setBootstrapUrls(this.coordinatorConfig.getBootstrapURLs());
storeClientFactory = new SocketStoreClientFactory(clientConfig);
initializeFatClients();
@@ -145,6 +163,8 @@ public Void call() throws Exception {
};
+ // For now track changes in cluster.xml only
+ // TODO: Modify this to track stores.xml in the future
asyncMetadataManager = new AsyncMetadataVersionManager(sysRepository,
rebootstrapCallback,
null);
@@ -153,18 +173,37 @@ public Void call() throws Exception {
schedulerService.schedule(asyncMetadataManager.getClass().getName(),
asyncMetadataManager,
new Date(),
- this.config.getMetadataCheckIntervalInMs());
+ this.coordinatorConfig.getMetadataCheckIntervalInMs());
// Configure the server.
- ServerBootstrap bootstrap = new ServerBootstrap(new NioServerSocketChannelFactory(Executors.newCachedThreadPool(),
- Executors.newCachedThreadPool()));
- bootstrap.setOption("backlog", 1000);
+ this.workerPool = (ThreadPoolExecutor) Executors.newCachedThreadPool();
+ this.bootstrap = new ServerBootstrap(new NioServerSocketChannelFactory(Executors.newCachedThreadPool(),
+ workerPool));
+ this.bootstrap.setOption("backlog", this.coordinatorConfig.getNettyServerBacklog());
+ this.bootstrap.setOption("child.tcpNoDelay", true);
+ this.bootstrap.setOption("child.keepAlive", true);
+ this.bootstrap.setOption("child.reuseAddress", true);
// Set up the event pipeline factory.
- bootstrap.setPipelineFactory(new CoordinatorPipelineFactory(fatClientMap, noop));
+ this.bootstrap.setPipelineFactory(new CoordinatorPipelineFactory(this.fatClientMap,
+ this.errorStats,
+ noop));
+
+ // Register the Mbean
+ // Netty Queue stats
+ JmxUtils.registerMbean(this,
+ JmxUtils.createObjectName(JmxUtils.getPackageName(this.getClass()),
+ JmxUtils.getClassName(this.getClass())));
+
+ // Error stats Mbean
+ JmxUtils.registerMbean(this.errorStats,
+ JmxUtils.createObjectName(JmxUtils.getPackageName(this.errorStats.getClass()),
+ JmxUtils.getClassName(this.errorStats.getClass())));
// Bind and start to accept incoming connections.
- bootstrap.bind(new InetSocketAddress(8080));
+ this.nettyServerChannel = this.bootstrap.bind(new InetSocketAddress(this.coordinatorConfig.getServerPort()));
+
+ logger.info("Coordinator service started on port " + this.coordinatorConfig.getServerPort());
}
/**
@@ -205,14 +244,14 @@ private static Map readClientConfig(String configFilePath,
throw new Exception("Illegal Store Name !!!");
}
- ClientConfig config = new ClientConfig(props);
- config.setBootstrapUrls(bootstrapURLs)
- .setEnableCompressionLayer(false)
- .setEnableSerializationLayer(false)
- .enableDefaultClient(true)
- .setEnableLazy(false);
+ ClientConfig fatClientConfig = new ClientConfig(props);
+ fatClientConfig.setBootstrapUrls(bootstrapURLs)
+ .setEnableCompressionLayer(false)
+ .setEnableSerializationLayer(false)
+ .enableDefaultClient(true)
+ .setEnableLazy(false);
- storeNameConfigMap.put(storeName, config);
+ storeNameConfigMap.put(storeName, fatClientConfig);
}
}
@@ -231,7 +270,17 @@ private static Map readClientConfig(String configFilePath,
}
@Override
- protected void stopInner() {}
+ protected void stopInner() {
+ if(this.nettyServerChannel != null) {
+ this.nettyServerChannel.close();
+ }
+
+ JmxUtils.unregisterMbean(JmxUtils.createObjectName(JmxUtils.getPackageName(this.getClass()),
+ JmxUtils.getClassName(this.getClass())));
+
+ JmxUtils.unregisterMbean(JmxUtils.createObjectName(JmxUtils.getPackageName(this.errorStats.getClass()),
+ JmxUtils.getClassName(this.errorStats.getClass())));
+ }
public static void main(String[] args) throws Exception {
CoordinatorConfig config = null;
@@ -264,4 +313,59 @@ public void run() {
}
});
}
+
+ @JmxGetter(name = "numberOfActiveThreads", description = "The number of active Netty worker threads.")
+ public int getNumberOfActiveThreads() {
+ return this.workerPool.getActiveCount();
+ }
+
+ @JmxGetter(name = "numberOfThreads", description = "The total number of Netty worker threads, active and idle.")
+ public int getNumberOfThreads() {
+ return this.workerPool.getPoolSize();
+ }
+
+ @JmxGetter(name = "queuedRequests", description = "Number of requests in the Netty worker queue waiting to execute.")
+ public int getQueuedRequests() {
+ return this.workerPool.getQueue().size();
+ }
+
+ @JmxGetter(name = "averageGetCompletionTimeInMs", description = "The avg. time in ms for GET calls to complete.")
+ public double getAverageGetCompletionTimeInMs() {
+ return this.coordinatorPerfStats.getAvgTimeInMs(Tracked.GET);
+ }
+
+ @JmxGetter(name = "averagePutCompletionTimeInMs", description = "The avg. time in ms for GET calls to complete.")
+ public double getAveragePutCompletionTimeInMs() {
+ return this.coordinatorPerfStats.getAvgTimeInMs(Tracked.PUT);
+ }
+
+ @JmxGetter(name = "averageGetAllCompletionTimeInMs", description = "The avg. time in ms for GET calls to complete.")
+ public double getAverageGetAllCompletionTimeInMs() {
+ return this.coordinatorPerfStats.getAvgTimeInMs(Tracked.GET_ALL);
+ }
+
+ @JmxGetter(name = "averageDeleteCompletionTimeInMs", description = "The avg. time in ms for GET calls to complete.")
+ public double getAverageDeleteCompletionTimeInMs() {
+ return this.coordinatorPerfStats.getAvgTimeInMs(Tracked.DELETE);
+ }
+
+ @JmxGetter(name = "q99GetLatencyInMs", description = "")
+ public long getQ99GetLatency() {
+ return this.coordinatorPerfStats.getQ99LatencyInMs(Tracked.GET);
+ }
+
+ @JmxGetter(name = "q99PutLatencyInMs", description = "")
+ public long getQ99PutLatency() {
+ return this.coordinatorPerfStats.getQ99LatencyInMs(Tracked.PUT);
+ }
+
+ @JmxGetter(name = "q99GetAllLatencyInMs", description = "")
+ public long getQ99GetAllLatency() {
+ return this.coordinatorPerfStats.getQ99LatencyInMs(Tracked.GET_ALL);
+ }
+
+ @JmxGetter(name = "q99DeleteLatencyInMs", description = "")
+ public long getQ99DeleteLatency() {
+ return this.coordinatorPerfStats.getQ99LatencyInMs(Tracked.DELETE);
+ }
}
diff --git a/src/java/voldemort/coordinator/CoordinatorUtils.java b/src/java/voldemort/coordinator/CoordinatorUtils.java
new file mode 100644
index 0000000000..d8003ca655
--- /dev/null
+++ b/src/java/voldemort/coordinator/CoordinatorUtils.java
@@ -0,0 +1,46 @@
+package voldemort.coordinator;
+
+import org.codehaus.jackson.map.ObjectMapper;
+
+import voldemort.versioning.VectorClock;
+
+public class CoordinatorUtils {
+
+ /**
+ * Function to serialize the given Vector clock into a string. If something
+ * goes wrong, it returns an empty string.
+ *
+ * @param vc The Vector clock to serialize
+ * @return The string (JSON) version of the specified Vector clock
+ */
+ public static String getSerializedVectorClock(VectorClock vc) {
+ VectorClockWrapper vcWrapper = new VectorClockWrapper(vc);
+ ObjectMapper mapper = new ObjectMapper();
+ String serializedVC = "";
+ try {
+ serializedVC = mapper.writeValueAsString(vcWrapper);
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+ return serializedVC;
+ }
+
+ public static VectorClock deserializeVectorClock(String serializedVC) {
+ VectorClock vc = null;
+
+ if(serializedVC == null) {
+ return null;
+ }
+
+ ObjectMapper mapper = new ObjectMapper();
+
+ try {
+ VectorClockWrapper vcWrapper = mapper.readValue(serializedVC, VectorClockWrapper.class);
+ vc = new VectorClock(vcWrapper.getVersions(), vcWrapper.getTimestamp());
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+
+ return vc;
+ }
+}
diff --git a/src/java/voldemort/coordinator/DynamicTimeoutStoreClient.java b/src/java/voldemort/coordinator/DynamicTimeoutStoreClient.java
index 17f3d71af5..2c72b3e62a 100644
--- a/src/java/voldemort/coordinator/DynamicTimeoutStoreClient.java
+++ b/src/java/voldemort/coordinator/DynamicTimeoutStoreClient.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2008-2013 LinkedIn, Inc
+ * Copyright 2013 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -29,6 +29,7 @@
import voldemort.store.CompositeVersionedPutVoldemortRequest;
import voldemort.store.CompositeVoldemortRequest;
import voldemort.store.InvalidMetadataException;
+import voldemort.store.Store;
import voldemort.store.StoreTimeoutException;
import voldemort.versioning.ObsoleteVersionException;
import voldemort.versioning.VectorClock;
@@ -42,6 +43,8 @@
* features: 1) Per call timeout facility 2) Ability to disable resolution per
* call
*
+ * TODO: Merge this with DefaultStoreClient eventually.
+ *
* @param Type of the Key
* @param Type of the Value
*/
@@ -68,6 +71,17 @@ public DynamicTimeoutStoreClient(String storeName,
bootStrap(clusterXml, storesXml);
}
+ /**
+ * Dummy constructor for Unit test purposes
+ *
+ * @param customStore A custom store object to use for performing the
+ * operations
+ */
+ public DynamicTimeoutStoreClient(Store customStore) {
+ this.store = customStore;
+ this.metadataRefreshAttempts = 1;
+ }
+
// Bootstrap using the given cluster xml and stores xml
// The super class bootStrap() method is used to handle the
// InvalidMetadataException
@@ -76,6 +90,13 @@ public void bootStrap(String customClusterXml, String customStoresXml) {
this.store = factory.getRawStore(storeName, null, customStoresXml, customClusterXml, null);
}
+ /**
+ * Performs a get operation with the specified composite request object
+ *
+ * @param requestWrapper A composite request object containing the key (and
+ * / or default value) and timeout.
+ * @return The Versioned value corresponding to the key
+ */
public Versioned getWithCustomTimeout(CompositeVoldemortRequest requestWrapper) {
validateTimeout(requestWrapper.getRoutingTimeoutInMs());
for(int attempts = 0; attempts < this.metadataRefreshAttempts; attempts++) {
@@ -92,14 +113,21 @@ public Versioned getWithCustomTimeout(CompositeVoldemortRequest request
+ " metadata refresh attempts failed.");
}
+ /**
+ * Performs a put operation with the specified composite request object
+ *
+ * @param requestWrapper A composite request object containing the key and
+ * value
+ * @return Version of the value for the successful put
+ */
public Version putWithCustomTimeout(CompositeVoldemortRequest requestWrapper) {
validateTimeout(requestWrapper.getRoutingTimeoutInMs());
Versioned versioned;
long startTime = System.currentTimeMillis();
// We use the full timeout for doing the Get. In this, we're being
- // optimistic that the subsequent put might be faster all the steps
- // might finish within the alloted time
+ // optimistic that the subsequent put might be faster such that all the
+ // steps might finish within the alloted time
versioned = getWithCustomTimeout(requestWrapper);
long endTime = System.currentTimeMillis();
@@ -119,6 +147,15 @@ public Version putWithCustomTimeout(CompositeVoldemortRequest requestWrapp
(requestWrapper.getRoutingTimeoutInMs() - (endTime - startTime))));
}
+ /**
+ * Performs a Versioned put operation with the specified composite request
+ * object
+ *
+ * @param requestWrapper Composite request object containing the key and the
+ * versioned object
+ * @return Version of the value for the successful put
+ * @throws ObsoleteVersionException
+ */
public Version putVersionedWithCustomTimeout(CompositeVoldemortRequest requestWrapper)
throws ObsoleteVersionException {
validateTimeout(requestWrapper.getRoutingTimeoutInMs());
@@ -136,6 +173,14 @@ public Version putVersionedWithCustomTimeout(CompositeVoldemortRequest req
+ " metadata refresh attempts failed.");
}
+ /**
+ * Performs a get all operation with the specified composite request object
+ *
+ * @param requestWrapper Composite request object containing a reference to
+ * the Iterable keys
+ *
+ * @return Map of the keys to the corresponding versioned values
+ */
public Map> getAllWithCustomTimeout(CompositeVoldemortRequest requestWrapper) {
validateTimeout(requestWrapper.getRoutingTimeoutInMs());
Map>> items = null;
@@ -161,6 +206,13 @@ public Map> getAllWithCustomTimeout(CompositeVoldemortRequest deleteRequestObject) {
validateTimeout(deleteRequestObject.getRoutingTimeoutInMs());
if(deleteRequestObject.getVersion() == null) {
@@ -194,7 +246,11 @@ public boolean deleteWithCustomTimeout(CompositeVoldemortRequest deleteReq
return store.delete(deleteRequestObject);
}
- // Make sure that the timeout specified is valid
+ /**
+ * Function to check that the timeout specified is valid
+ *
+ * @param opTimeoutInMs The specified timeout in milliseconds
+ */
private void validateTimeout(long opTimeoutInMs) {
if(opTimeoutInMs <= 0) {
throw new IllegalArgumentException("Illegal parameter: Timeout is too low: "
diff --git a/src/java/voldemort/coordinator/FatClientWrapper.java b/src/java/voldemort/coordinator/FatClientWrapper.java
index 81f0e08b31..c7944402ec 100644
--- a/src/java/voldemort/coordinator/FatClientWrapper.java
+++ b/src/java/voldemort/coordinator/FatClientWrapper.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2008-2013 LinkedIn, Inc
+ * Copyright 2013 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -16,10 +16,9 @@
package voldemort.coordinator;
-import java.util.concurrent.ExecutorService;
+import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.RejectedExecutionHandler;
-import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
@@ -27,10 +26,14 @@
import org.apache.log4j.Logger;
import org.jboss.netty.channel.MessageEvent;
+import voldemort.annotations.jmx.JmxGetter;
+import voldemort.annotations.jmx.JmxManaged;
import voldemort.client.ClientConfig;
import voldemort.client.SocketStoreClientFactory;
import voldemort.store.CompositeVoldemortRequest;
+import voldemort.store.stats.StoreStats;
import voldemort.utils.ByteArray;
+import voldemort.utils.JmxUtils;
/**
* A Wrapper class to provide asynchronous API for calling the fat client
@@ -38,13 +41,17 @@
* of invoking the Fat Client methods on its own
*
*/
+@JmxManaged(description = "A Wrapper for a Fat client in order to execute requests asynchronously")
public class FatClientWrapper {
- private ExecutorService fatClientExecutor;
+ private ThreadPoolExecutor fatClientExecutor;
private SocketStoreClientFactory storeClientFactory;
private DynamicTimeoutStoreClient dynamicTimeoutClient;
- private final CoordinatorConfig config;
+ private final CoordinatorConfig coordinatorConfig;
private final Logger logger = Logger.getLogger(FatClientWrapper.class);
+ private final String storeName;
+ private final CoordinatorErrorStats errorStats;
+ private final StoreStats coordinatorPerfStats;
/**
*
@@ -53,25 +60,27 @@ public class FatClientWrapper {
* @param clientConfig The config used to bootstrap the fat client
* @param storesXml Stores XML used to bootstrap the fat client
* @param clusterXml Cluster XML used to bootstrap the fat client
+ * @param errorStats
+ * @param coordinatorPerfStats
*/
public FatClientWrapper(String storeName,
CoordinatorConfig config,
ClientConfig clientConfig,
String storesXml,
- String clusterXml) {
+ String clusterXml,
+ CoordinatorErrorStats errorStats,
+ StoreStats coordinatorPerfStats) {
- this.config = config;
+ this.coordinatorConfig = config;
// TODO: Import this from Config
- this.fatClientExecutor = new ThreadPoolExecutor(this.config.getFatClientWrapperCorePoolSize(),
- this.config.getFatClientWrapperMaxPoolSize(),
- this.config.getFatClientWrapperKeepAliveInSecs(), // Keepalive
+ this.fatClientExecutor = new ThreadPoolExecutor(clientConfig.getFatClientWrapperCorePoolSize(),
+ clientConfig.getFatClientWrapperMaxPoolSize(),
+ clientConfig.getFatClientWrapperKeepAliveInSecs(), // Keepalive
TimeUnit.SECONDS, // Keepalive
// Timeunit
- new SynchronousQueue(), // Queue
- // for
- // pending
- // tasks
+ new ArrayBlockingQueue(clientConfig.getFatClientWrapperMaxPoolSize(),
+ true),
new ThreadFactory() {
@@ -95,7 +104,6 @@ public void rejectedExecution(Runnable r,
}
});
- // this.fatClientRequestQueue = new SynchronousQueue();
this.storeClientFactory = new SocketStoreClientFactory(clientConfig);
this.dynamicTimeoutClient = new DynamicTimeoutStoreClient(storeName,
@@ -103,7 +111,24 @@ public void rejectedExecution(Runnable r,
1,
storesXml,
clusterXml);
+ this.errorStats = errorStats;
+ this.coordinatorPerfStats = coordinatorPerfStats;
+ this.storeName = storeName;
+ // Register the Mbean
+ JmxUtils.registerMbean(this,
+ JmxUtils.createObjectName(JmxUtils.getPackageName(this.getClass()),
+ JmxUtils.getClassName(this.getClass())
+ + "-" + storeName));
+
+ }
+
+ public void close() {
+ // Register the Mbean
+ JmxUtils.unregisterMbean(JmxUtils.createObjectName(JmxUtils.getPackageName(this.getClass()),
+ JmxUtils.getClassName(this.getClass())
+ + "-" + this.storeName));
+ this.storeClientFactory.close();
}
/**
@@ -111,22 +136,25 @@ public void rejectedExecution(Runnable r,
*
* @param getRequestObject Contains the key used in the get operation
* @param getRequestMessageEvent MessageEvent to write the response back to
+ * @param startTimestampInNs The start timestamp used to measure turnaround
+ * time
*/
void submitGetRequest(final CompositeVoldemortRequest getRequestObject,
- final MessageEvent getRequestMessageEvent) {
+ final MessageEvent getRequestMessageEvent,
+ long startTimestampInNs) {
try {
this.fatClientExecutor.submit(new HttpGetRequestExecutor(getRequestObject,
getRequestMessageEvent,
- this.dynamicTimeoutClient));
+ this.dynamicTimeoutClient,
+ startTimestampInNs,
+ this.coordinatorPerfStats));
if(logger.isDebugEnabled()) {
logger.debug("Submitted a get request");
}
- // Keep track of this request for monitoring
- // this.fatClientRequestQueue.add(f);
} catch(RejectedExecutionException rej) {
- handleRejectedException(getRequestMessageEvent);
+ handleRejectedException(rej, getRequestMessageEvent);
}
}
@@ -136,50 +164,56 @@ void submitGetRequest(final CompositeVoldemortRequest getRequ
* @param getAllRequestObject Contains the keys used in the getAll oepration
* @param getAllRequestMessageEvent MessageEvent to write the response back
* to
+ * @param storeName Name of the store to be specified in the response
+ * (header)
+ * @param startTimestampInNs The start timestamp used to measure turnaround
+ * time
*/
void submitGetAllRequest(final CompositeVoldemortRequest getAllRequestObject,
final MessageEvent getAllRequestMessageEvent,
- final String storeName) {
+ final String storeName,
+ long startTimestampInNs) {
try {
this.fatClientExecutor.submit(new HttpGetAllRequestExecutor(getAllRequestObject,
getAllRequestMessageEvent,
this.dynamicTimeoutClient,
- storeName));
+ storeName,
+ startTimestampInNs,
+ this.coordinatorPerfStats));
if(logger.isDebugEnabled()) {
logger.debug("Submitted a get all request");
}
- // Keep track of this request for monitoring
- // this.fatClientRequestQueue.add(f);
} catch(RejectedExecutionException rej) {
- handleRejectedException(getAllRequestMessageEvent);
+ handleRejectedException(rej, getAllRequestMessageEvent);
}
}
/**
* Interface to perform put operation on the Fat client
*
- * @param key: ByteArray representation of the key to put
- * @param value: value corresponding to the key to put
- * @param putRequest: MessageEvent to write the response on.
- * @param operationTimeoutInMs The timeout value for this operation
+ * @param putRequestObject Request object containing the key and value
+ * @param putRequestMessageEvent MessageEvent to write the response on.
+ * @param startTimestampInNs The start timestamp used to measure turnaround
+ * time
*/
void submitPutRequest(final CompositeVoldemortRequest putRequestObject,
- final MessageEvent putRequest) {
+ final MessageEvent putRequestMessageEvent,
+ long startTimestampInNs) {
try {
this.fatClientExecutor.submit(new HttpPutRequestExecutor(putRequestObject,
- putRequest,
- this.dynamicTimeoutClient));
+ putRequestMessageEvent,
+ this.dynamicTimeoutClient,
+ startTimestampInNs,
+ this.coordinatorPerfStats));
if(logger.isDebugEnabled()) {
logger.debug("Submitted a put request");
}
- // Keep track of this request for monitoring
- // this.fatClientRequestQueue.add(f);
} catch(RejectedExecutionException rej) {
- handleRejectedException(putRequest);
+ handleRejectedException(rej, putRequestMessageEvent);
}
}
@@ -189,37 +223,46 @@ void submitPutRequest(final CompositeVoldemortRequest putRequ
* @param deleteRequestObject Contains the key and the version used in the
* delete operation
* @param deleteRequestEvent MessageEvent to write the response back to
+ * @param startTimestampInNs The start timestamp used to measure turnaround
+ * time
*/
public void submitDeleteRequest(CompositeVoldemortRequest deleteRequestObject,
- MessageEvent deleteRequestEvent) {
+ MessageEvent deleteRequestEvent,
+ long startTimestampInNs) {
try {
this.fatClientExecutor.submit(new HttpDeleteRequestExecutor(deleteRequestObject,
deleteRequestEvent,
- this.dynamicTimeoutClient));
+ this.dynamicTimeoutClient,
+ startTimestampInNs,
+ this.coordinatorPerfStats));
- // Keep track of this request for monitoring
- // this.fatClientRequestQueue.add(f);
} catch(RejectedExecutionException rej) {
- handleRejectedException(deleteRequestEvent);
+ handleRejectedException(rej, deleteRequestEvent);
}
}
// TODO: Add a custom HTTP Error status 429: Too many requests
- private void handleRejectedException(MessageEvent getRequest) {
+ private void handleRejectedException(RejectedExecutionException rej, MessageEvent getRequest) {
+ this.errorStats.reportException(rej);
logger.error("rejected !!!");
getRequest.getChannel().write(null); // Write error back to the thin
// client
- // String errorDescription =
- // "Request queue for store " +
- // this.dynamicTimeoutClient.getStoreName()
- // + " is full !");
- // logger.error(errorDescription);
- // RESTErrorHandler.handleError(REQUEST_TIMEOUT,
- // this.getRequestMessageEvent,
- // false,
- // errorDescription);
}
+ @JmxGetter(name = "numberOfActiveThreads", description = "The number of active Fat client wrapper threads.")
+ public int getNumberOfActiveThreads() {
+ return this.fatClientExecutor.getActiveCount();
+ }
+
+ @JmxGetter(name = "numberOfThreads", description = "The total number of Fat client wrapper threads, active and idle.")
+ public int getNumberOfThreads() {
+ return this.fatClientExecutor.getPoolSize();
+ }
+
+ @JmxGetter(name = "queuedRequests", description = "Number of requests in the Fat client wrapper queue waiting to execute.")
+ public int getQueuedRequests() {
+ return this.fatClientExecutor.getQueue().size();
+ }
}
diff --git a/src/java/voldemort/coordinator/HttpDeleteRequestExecutor.java b/src/java/voldemort/coordinator/HttpDeleteRequestExecutor.java
index 9009c19d16..c1b2e79aab 100644
--- a/src/java/voldemort/coordinator/HttpDeleteRequestExecutor.java
+++ b/src/java/voldemort/coordinator/HttpDeleteRequestExecutor.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2008-2013 LinkedIn, Inc
+ * Copyright 2013 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -17,17 +17,13 @@
package voldemort.coordinator;
import static org.jboss.netty.handler.codec.http.HttpHeaders.Names.CONTENT_LENGTH;
-import static org.jboss.netty.handler.codec.http.HttpHeaders.Names.CONTENT_TRANSFER_ENCODING;
-import static org.jboss.netty.handler.codec.http.HttpHeaders.Names.CONTENT_TYPE;
import static org.jboss.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR;
import static org.jboss.netty.handler.codec.http.HttpResponseStatus.NOT_FOUND;
-import static org.jboss.netty.handler.codec.http.HttpResponseStatus.OK;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.NO_CONTENT;
import static org.jboss.netty.handler.codec.http.HttpResponseStatus.REQUEST_TIMEOUT;
import static org.jboss.netty.handler.codec.http.HttpVersion.HTTP_1_1;
import org.apache.log4j.Logger;
-import org.jboss.netty.channel.ChannelFuture;
-import org.jboss.netty.channel.ChannelFutureListener;
import org.jboss.netty.channel.MessageEvent;
import org.jboss.netty.handler.codec.http.DefaultHttpResponse;
import org.jboss.netty.handler.codec.http.HttpResponse;
@@ -35,6 +31,8 @@
import voldemort.VoldemortException;
import voldemort.store.CompositeVoldemortRequest;
import voldemort.store.StoreTimeoutException;
+import voldemort.store.stats.StoreStats;
+import voldemort.store.stats.Tracked;
import voldemort.utils.ByteArray;
/**
@@ -49,6 +47,8 @@ public class HttpDeleteRequestExecutor implements Runnable {
DynamicTimeoutStoreClient storeClient;
private final Logger logger = Logger.getLogger(HttpDeleteRequestExecutor.class);
private final CompositeVoldemortRequest deleteRequestObject;
+ private final long startTimestampInNs;
+ private final StoreStats coordinatorPerfStats;
/**
*
@@ -58,31 +58,37 @@ public class HttpDeleteRequestExecutor implements Runnable {
* error
* @param storeClient Reference to the fat client for performing this Delete
* operation
+ * @param coordinatorPerfStats Stats object used to measure the turnaround
+ * time
+ * @param startTimestampInNs start timestamp of the request
*/
public HttpDeleteRequestExecutor(CompositeVoldemortRequest deleteRequestObject,
MessageEvent requestEvent,
- DynamicTimeoutStoreClient storeClient) {
+ DynamicTimeoutStoreClient storeClient,
+ long startTimestampInNs,
+ StoreStats coordinatorPerfStats) {
this.deleteRequestMessageEvent = requestEvent;
this.storeClient = storeClient;
this.deleteRequestObject = deleteRequestObject;
+ this.startTimestampInNs = startTimestampInNs;
+ this.coordinatorPerfStats = coordinatorPerfStats;
}
public void writeResponse() {
// 1. Create the Response object
- HttpResponse response = new DefaultHttpResponse(HTTP_1_1, OK);
+ HttpResponse response = new DefaultHttpResponse(HTTP_1_1, NO_CONTENT);
// 2. Set the right headers
- response.setHeader(CONTENT_TYPE, "binary");
- response.setHeader(CONTENT_TRANSFER_ENCODING, "binary");
response.setHeader(CONTENT_LENGTH, "0");
- // Write the response to the Netty Channel
- ChannelFuture future = this.deleteRequestMessageEvent.getChannel().write(response);
-
- // Close the non-keep-alive connection after the write operation is
- // done.
- future.addListener(ChannelFutureListener.CLOSE);
+ // Update the stats
+ if(this.coordinatorPerfStats != null) {
+ long durationInNs = System.nanoTime() - startTimestampInNs;
+ this.coordinatorPerfStats.recordTime(Tracked.DELETE, durationInNs);
+ }
+ // Write the response to the Netty Channel
+ this.deleteRequestMessageEvent.getChannel().write(response);
}
@Override
@@ -94,7 +100,6 @@ public void run() {
} else {
RESTErrorHandler.handleError(NOT_FOUND,
this.deleteRequestMessageEvent,
- false,
"Requested Key with the specified version does not exist");
}
@@ -103,14 +108,12 @@ public void run() {
logger.error(errorDescription);
RESTErrorHandler.handleError(REQUEST_TIMEOUT,
this.deleteRequestMessageEvent,
- false,
errorDescription);
} catch(VoldemortException ve) {
ve.printStackTrace();
String errorDescription = "Voldemort Exception: " + ve.getMessage();
RESTErrorHandler.handleError(INTERNAL_SERVER_ERROR,
this.deleteRequestMessageEvent,
- false,
errorDescription);
}
}
diff --git a/src/java/voldemort/coordinator/HttpGetAllRequestExecutor.java b/src/java/voldemort/coordinator/HttpGetAllRequestExecutor.java
index 7ef491e677..c8a774b989 100644
--- a/src/java/voldemort/coordinator/HttpGetAllRequestExecutor.java
+++ b/src/java/voldemort/coordinator/HttpGetAllRequestExecutor.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2008-2013 LinkedIn, Inc
+ * Copyright 2013 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -44,8 +44,6 @@
import org.codehaus.jackson.map.ObjectMapper;
import org.jboss.netty.buffer.ChannelBuffer;
import org.jboss.netty.buffer.ChannelBuffers;
-import org.jboss.netty.channel.ChannelFuture;
-import org.jboss.netty.channel.ChannelFutureListener;
import org.jboss.netty.channel.MessageEvent;
import org.jboss.netty.handler.codec.http.DefaultHttpResponse;
import org.jboss.netty.handler.codec.http.HttpResponse;
@@ -53,6 +51,8 @@
import voldemort.VoldemortException;
import voldemort.store.CompositeVoldemortRequest;
import voldemort.store.StoreTimeoutException;
+import voldemort.store.stats.StoreStats;
+import voldemort.store.stats.Tracked;
import voldemort.utils.ByteArray;
import voldemort.versioning.VectorClock;
import voldemort.versioning.Versioned;
@@ -70,6 +70,8 @@ public class HttpGetAllRequestExecutor implements Runnable {
private final Logger logger = Logger.getLogger(HttpGetRequestExecutor.class);
private final CompositeVoldemortRequest getAllRequestObject;
private final String storeName;
+ private final long startTimestampInNs;
+ private final StoreStats coordinatorPerfStats;
/**
*
@@ -79,15 +81,24 @@ public class HttpGetAllRequestExecutor implements Runnable {
* error
* @param storeClient Reference to the fat client for performing this Get
* operation
+ * @param storeName Name of the store intended to be included in the
+ * response (content-location)
+ * @param coordinatorPerfStats Stats object used to measure the turnaround
+ * time
+ * @param startTimestampInNs start timestamp of the request
*/
public HttpGetAllRequestExecutor(CompositeVoldemortRequest getAllRequestObject,
MessageEvent requestMessageEvent,
DynamicTimeoutStoreClient storeClient,
- String storeName) {
+ String storeName,
+ long startTimestampInNs,
+ StoreStats coordinatorPerfStats) {
this.getRequestMessageEvent = requestMessageEvent;
this.storeClient = storeClient;
this.getAllRequestObject = getAllRequestObject;
this.storeName = storeName;
+ this.startTimestampInNs = startTimestampInNs;
+ this.coordinatorPerfStats = coordinatorPerfStats;
}
public void writeResponse(Map> responseVersioned) {
@@ -160,13 +171,14 @@ public void writeResponse(Map> responseVersioned) {
response.setContent(responseContent);
response.setHeader(CONTENT_LENGTH, response.getContent().readableBytes());
- // Write the response to the Netty Channel
- ChannelFuture future = this.getRequestMessageEvent.getChannel().write(response);
-
- // Close the non-keep-alive connection after the write operation is
- // done.
- future.addListener(ChannelFutureListener.CLOSE);
+ // Update the stats
+ if(this.coordinatorPerfStats != null) {
+ long durationInNs = System.nanoTime() - startTimestampInNs;
+ this.coordinatorPerfStats.recordTime(Tracked.GET_ALL, durationInNs);
+ }
+ // Write the response to the Netty Channel
+ this.getRequestMessageEvent.getChannel().write(response);
}
@Override
@@ -176,7 +188,6 @@ public void run() {
if(responseVersioned == null) {
RESTErrorHandler.handleError(NOT_FOUND,
this.getRequestMessageEvent,
- false,
"Requested Key does not exist");
}
writeResponse(responseVersioned);
@@ -184,22 +195,17 @@ public void run() {
String errorDescription = "GETALL Failed !!! Illegal Arguments : "
+ illegalArgsException.getMessage();
logger.error(errorDescription);
- RESTErrorHandler.handleError(BAD_REQUEST,
- this.getRequestMessageEvent,
- false,
- errorDescription);
+ RESTErrorHandler.handleError(BAD_REQUEST, this.getRequestMessageEvent, errorDescription);
} catch(StoreTimeoutException timeoutException) {
String errorDescription = "GET Request timed out: " + timeoutException.getMessage();
logger.error(errorDescription);
RESTErrorHandler.handleError(REQUEST_TIMEOUT,
this.getRequestMessageEvent,
- false,
errorDescription);
} catch(VoldemortException ve) {
String errorDescription = "Voldemort Exception: " + ve.getMessage();
RESTErrorHandler.handleError(INTERNAL_SERVER_ERROR,
this.getRequestMessageEvent,
- false,
errorDescription);
}
}
diff --git a/src/java/voldemort/coordinator/HttpGetRequestExecutor.java b/src/java/voldemort/coordinator/HttpGetRequestExecutor.java
index 61c4a543c8..02a1de9a58 100644
--- a/src/java/voldemort/coordinator/HttpGetRequestExecutor.java
+++ b/src/java/voldemort/coordinator/HttpGetRequestExecutor.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2008-2013 LinkedIn, Inc
+ * Copyright 2013 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -27,12 +27,7 @@
import static org.jboss.netty.handler.codec.http.HttpResponseStatus.REQUEST_TIMEOUT;
import static org.jboss.netty.handler.codec.http.HttpVersion.HTTP_1_1;
-import java.io.IOException;
-
import org.apache.log4j.Logger;
-import org.codehaus.jackson.JsonGenerationException;
-import org.codehaus.jackson.map.JsonMappingException;
-import org.codehaus.jackson.map.ObjectMapper;
import org.jboss.netty.buffer.ChannelBuffer;
import org.jboss.netty.buffer.ChannelBuffers;
import org.jboss.netty.channel.MessageEvent;
@@ -42,6 +37,8 @@
import voldemort.VoldemortException;
import voldemort.store.CompositeVoldemortRequest;
import voldemort.store.StoreTimeoutException;
+import voldemort.store.stats.StoreStats;
+import voldemort.store.stats.Tracked;
import voldemort.utils.ByteArray;
import voldemort.versioning.VectorClock;
import voldemort.versioning.Versioned;
@@ -59,6 +56,20 @@ public class HttpGetRequestExecutor implements Runnable {
DynamicTimeoutStoreClient storeClient;
private final Logger logger = Logger.getLogger(HttpGetRequestExecutor.class);
private final CompositeVoldemortRequest getRequestObject;
+ private final long startTimestampInNs;
+ private final StoreStats coordinatorPerfStats;
+
+ /**
+ * Dummy constructor invoked during a Noop Get operation
+ *
+ * @param requestEvent MessageEvent used to write the response
+ */
+ public HttpGetRequestExecutor(MessageEvent requestEvent) {
+ this.getRequestMessageEvent = requestEvent;
+ this.getRequestObject = null;
+ this.startTimestampInNs = 0;
+ this.coordinatorPerfStats = null;
+ }
/**
*
@@ -68,13 +79,20 @@ public class HttpGetRequestExecutor implements Runnable {
* error
* @param storeClient Reference to the fat client for performing this Get
* operation
+ * @param coordinatorPerfStats Stats object used to measure the turnaround
+ * time
+ * @param startTimestampInNs start timestamp of the request
*/
public HttpGetRequestExecutor(CompositeVoldemortRequest getRequestObject,
MessageEvent requestEvent,
- DynamicTimeoutStoreClient storeClient) {
+ DynamicTimeoutStoreClient storeClient,
+ long startTimestampInNs,
+ StoreStats coordinatorPerfStats) {
this.getRequestMessageEvent = requestEvent;
this.storeClient = storeClient;
this.getRequestObject = getRequestObject;
+ this.startTimestampInNs = startTimestampInNs;
+ this.coordinatorPerfStats = coordinatorPerfStats;
}
public void writeResponse(Versioned responseVersioned) {
@@ -87,18 +105,7 @@ public void writeResponse(Versioned responseVersioned) {
this.responseContent.writeBytes(value);
VectorClock vc = (VectorClock) responseVersioned.getVersion();
- VectorClockWrapper vcWrapper = new VectorClockWrapper(vc);
- ObjectMapper mapper = new ObjectMapper();
- String eTag = "";
- try {
- eTag = mapper.writeValueAsString(vcWrapper);
- } catch(JsonGenerationException e) {
- e.printStackTrace();
- } catch(JsonMappingException e) {
- e.printStackTrace();
- } catch(IOException e) {
- e.printStackTrace();
- }
+ String eTag = CoordinatorUtils.getSerializedVectorClock(vc);
if(logger.isDebugEnabled()) {
logger.debug("ETAG : " + eTag);
@@ -120,6 +127,12 @@ public void writeResponse(Versioned responseVersioned) {
logger.debug("Response = " + response);
}
+ // Update the stats
+ if(this.coordinatorPerfStats != null) {
+ long durationInNs = System.nanoTime() - startTimestampInNs;
+ this.coordinatorPerfStats.recordTime(Tracked.GET, durationInNs);
+ }
+
// Write the response to the Netty Channel
this.getRequestMessageEvent.getChannel().write(response);
}
@@ -134,7 +147,6 @@ public void run() {
} else {
RESTErrorHandler.handleError(NOT_FOUND,
this.getRequestMessageEvent,
- false,
"Requested Key does not exist");
}
if(logger.isDebugEnabled()) {
@@ -146,22 +158,17 @@ public void run() {
String errorDescription = "PUT Failed !!! Illegal Arguments : "
+ illegalArgsException.getMessage();
logger.error(errorDescription);
- RESTErrorHandler.handleError(BAD_REQUEST,
- this.getRequestMessageEvent,
- false,
- errorDescription);
+ RESTErrorHandler.handleError(BAD_REQUEST, this.getRequestMessageEvent, errorDescription);
} catch(StoreTimeoutException timeoutException) {
String errorDescription = "GET Request timed out: " + timeoutException.getMessage();
logger.error(errorDescription);
RESTErrorHandler.handleError(REQUEST_TIMEOUT,
this.getRequestMessageEvent,
- false,
errorDescription);
} catch(VoldemortException ve) {
String errorDescription = "Voldemort Exception: " + ve.getMessage();
RESTErrorHandler.handleError(INTERNAL_SERVER_ERROR,
this.getRequestMessageEvent,
- false,
errorDescription);
}
}
diff --git a/src/java/voldemort/coordinator/HttpPutRequestExecutor.java b/src/java/voldemort/coordinator/HttpPutRequestExecutor.java
index ebbc7acc0d..e0ad0f98da 100644
--- a/src/java/voldemort/coordinator/HttpPutRequestExecutor.java
+++ b/src/java/voldemort/coordinator/HttpPutRequestExecutor.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2008-2013 LinkedIn, Inc
+ * Copyright 2013 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -17,10 +17,10 @@
package voldemort.coordinator;
import static org.jboss.netty.handler.codec.http.HttpHeaders.Names.CONTENT_LENGTH;
-import static org.jboss.netty.handler.codec.http.HttpHeaders.Names.CONTENT_TYPE;
+import static org.jboss.netty.handler.codec.http.HttpHeaders.Names.ETAG;
import static org.jboss.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST;
+import static org.jboss.netty.handler.codec.http.HttpResponseStatus.CREATED;
import static org.jboss.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR;
-import static org.jboss.netty.handler.codec.http.HttpResponseStatus.OK;
import static org.jboss.netty.handler.codec.http.HttpResponseStatus.PRECONDITION_FAILED;
import static org.jboss.netty.handler.codec.http.HttpResponseStatus.REQUEST_TIMEOUT;
import static org.jboss.netty.handler.codec.http.HttpVersion.HTTP_1_1;
@@ -33,8 +33,11 @@
import voldemort.VoldemortException;
import voldemort.store.CompositeVoldemortRequest;
import voldemort.store.StoreTimeoutException;
+import voldemort.store.stats.StoreStats;
+import voldemort.store.stats.Tracked;
import voldemort.utils.ByteArray;
import voldemort.versioning.ObsoleteVersionException;
+import voldemort.versioning.VectorClock;
/**
* A Runnable class that uses the specified Fat client to perform a Voldemort
@@ -48,10 +51,19 @@ public class HttpPutRequestExecutor implements Runnable {
DynamicTimeoutStoreClient storeClient;
private final Logger logger = Logger.getLogger(HttpPutRequestExecutor.class);
private final CompositeVoldemortRequest putRequestObject;
+ private final long startTimestampInNs;
+ private final StoreStats coordinatorPerfStats;
+ /**
+ * Dummy constructor invoked during a Noop Put operation
+ *
+ * @param requestEvent MessageEvent used to write the response
+ */
public HttpPutRequestExecutor(MessageEvent requestEvent) {
this.putRequestMessageEvent = requestEvent;
this.putRequestObject = null;
+ this.startTimestampInNs = 0;
+ this.coordinatorPerfStats = null;
}
/**
@@ -62,25 +74,44 @@ public HttpPutRequestExecutor(MessageEvent requestEvent) {
* error
* @param storeClient Reference to the fat client for performing this Get
* operation
+ * @param coordinatorPerfStats Stats object used to measure the turnaround
+ * time
+ * @param startTimestampInNs start timestamp of the request
*/
public HttpPutRequestExecutor(CompositeVoldemortRequest putRequestObject,
MessageEvent requestEvent,
- DynamicTimeoutStoreClient storeClient) {
+ DynamicTimeoutStoreClient storeClient,
+ long startTimestampInNs,
+ StoreStats coordinatorPerfStats) {
this.putRequestMessageEvent = requestEvent;
this.storeClient = storeClient;
this.putRequestObject = putRequestObject;
+ this.startTimestampInNs = startTimestampInNs;
+ this.coordinatorPerfStats = coordinatorPerfStats;
}
- public void writeResponse() {
+ public void writeResponse(VectorClock successfulPutVC) {
// 1. Create the Response object
- HttpResponse response = new DefaultHttpResponse(HTTP_1_1, OK);
+ HttpResponse response = new DefaultHttpResponse(HTTP_1_1, CREATED);
- // 2. Set the right headers
- response.setHeader(CONTENT_TYPE, "application/json");
+ String eTag = CoordinatorUtils.getSerializedVectorClock(successfulPutVC);
- // 3. Copy the data into the payload
+ if(logger.isDebugEnabled()) {
+ logger.debug("ETAG : " + eTag);
+ }
+
+ // 2. Set the right headers
+ response.setHeader(ETAG, eTag);
response.setHeader(CONTENT_LENGTH, 0);
+ // TODO: return the Version back to the client
+
+ // Update the stats
+ if(this.coordinatorPerfStats != null) {
+ long durationInNs = System.nanoTime() - startTimestampInNs;
+ this.coordinatorPerfStats.recordTime(Tracked.PUT, durationInNs);
+ }
+
// Write the response to the Netty Channel
this.putRequestMessageEvent.getChannel().write(response);
}
@@ -89,26 +120,27 @@ public void writeResponse() {
public void run() {
try {
- this.storeClient.putWithCustomTimeout(putRequestObject);
+ VectorClock successfulPutVC = null;
+ if(putRequestObject.getValue() != null) {
+ successfulPutVC = (VectorClock) this.storeClient.putVersionedWithCustomTimeout(putRequestObject);
+ } else {
+ successfulPutVC = (VectorClock) this.storeClient.putWithCustomTimeout(putRequestObject);
+ }
if(logger.isDebugEnabled()) {
logger.debug("PUT successful !");
}
- writeResponse();
+ writeResponse(successfulPutVC);
} catch(IllegalArgumentException illegalArgsException) {
String errorDescription = "PUT Failed !!! Illegal Arguments : "
+ illegalArgsException.getMessage();
logger.error(errorDescription);
- RESTErrorHandler.handleError(BAD_REQUEST,
- this.putRequestMessageEvent,
- false,
- errorDescription);
+ RESTErrorHandler.handleError(BAD_REQUEST, this.putRequestMessageEvent, errorDescription);
} catch(ObsoleteVersionException oe) {
String errorDescription = "PUT Failed !!! Obsolete version exception: "
+ oe.getMessage();
RESTErrorHandler.handleError(PRECONDITION_FAILED,
this.putRequestMessageEvent,
- false,
errorDescription);
} catch(StoreTimeoutException timeoutException) {
@@ -116,14 +148,12 @@ public void run() {
logger.error(errorDescription);
RESTErrorHandler.handleError(REQUEST_TIMEOUT,
this.putRequestMessageEvent,
- false,
errorDescription);
} catch(VoldemortException ve) {
String errorDescription = "Voldemort Exception: " + ve.getMessage();
RESTErrorHandler.handleError(INTERNAL_SERVER_ERROR,
this.putRequestMessageEvent,
- false,
errorDescription);
}
}
diff --git a/src/java/voldemort/coordinator/NoopHttpRequestHandler.java b/src/java/voldemort/coordinator/NoopHttpRequestHandler.java
index 7b0574453a..79a07b4a30 100644
--- a/src/java/voldemort/coordinator/NoopHttpRequestHandler.java
+++ b/src/java/voldemort/coordinator/NoopHttpRequestHandler.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2008-2013 LinkedIn, Inc
+ * Copyright 2013 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -22,8 +22,7 @@
import org.jboss.netty.handler.codec.http.HttpRequest;
import voldemort.common.VoldemortOpCode;
-import voldemort.store.CompositeGetVoldemortRequest;
-import voldemort.utils.ByteArray;
+import voldemort.versioning.VectorClock;
import voldemort.versioning.Versioned;
/**
@@ -43,21 +42,16 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) throws Ex
switch(operationType) {
case VoldemortOpCode.GET_OP_CODE:
- HttpGetRequestExecutor getExecutor = new HttpGetRequestExecutor(new CompositeGetVoldemortRequest(null,
- 0l,
- false),
- e,
- null);
+ HttpGetRequestExecutor getExecutor = new HttpGetRequestExecutor(e);
Versioned responseVersioned = null;
- byte[] nullByteArray = new byte[1];
- nullByteArray[0] = 0;
- responseVersioned = new Versioned(nullByteArray);
+ byte[] sampleByteArray = "a".getBytes();
+ responseVersioned = new Versioned(sampleByteArray);
getExecutor.writeResponse(responseVersioned);
break;
case VoldemortOpCode.PUT_OP_CODE:
HttpPutRequestExecutor putRequestExecutor = new HttpPutRequestExecutor(e);
- putRequestExecutor.writeResponse();
+ putRequestExecutor.writeResponse(new VectorClock());
break;
default:
System.err.println("Illegal operation.");
diff --git a/src/java/voldemort/coordinator/RESTErrorHandler.java b/src/java/voldemort/coordinator/RESTErrorHandler.java
index f83e923248..d57b835aa7 100644
--- a/src/java/voldemort/coordinator/RESTErrorHandler.java
+++ b/src/java/voldemort/coordinator/RESTErrorHandler.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2008-2013 LinkedIn, Inc
+ * Copyright 2013 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -20,14 +20,14 @@
import static org.jboss.netty.handler.codec.http.HttpVersion.HTTP_1_1;
import org.jboss.netty.buffer.ChannelBuffers;
-import org.jboss.netty.channel.ChannelFuture;
-import org.jboss.netty.channel.ChannelFutureListener;
import org.jboss.netty.channel.MessageEvent;
import org.jboss.netty.handler.codec.http.DefaultHttpResponse;
import org.jboss.netty.handler.codec.http.HttpResponse;
import org.jboss.netty.handler.codec.http.HttpResponseStatus;
import org.jboss.netty.util.CharsetUtil;
+import voldemort.VoldemortException;
+
/**
* A Generic class used to propagate the error back to the client over the Netty
* channel
@@ -35,10 +35,15 @@
*/
public class RESTErrorHandler {
- public static void handleError(HttpResponseStatus status,
- MessageEvent e,
- boolean keepAlive,
- String message) {
+ static CoordinatorErrorStats errorStats;
+
+ public static void setErrorStatsHandler(CoordinatorErrorStats errorStatsObj) {
+ errorStats = errorStatsObj;
+ }
+
+ public static void handleError(HttpResponseStatus status, MessageEvent e, String message) {
+ errorStats.reportException(new VoldemortException());
+
// 1. Create the Response object
HttpResponse response = new DefaultHttpResponse(HTTP_1_1, status);
@@ -47,12 +52,6 @@ public static void handleError(HttpResponseStatus status,
+ message + "\r\n", CharsetUtil.UTF_8));
// Write the response to the Netty Channel
- ChannelFuture future = e.getChannel().write(response);
-
- // Close the non-keep-alive connection after the write operation is
- // done.
- if(!keepAlive) {
- future.addListener(ChannelFutureListener.CLOSE);
- }
+ e.getChannel().write(response);
}
}
diff --git a/src/java/voldemort/coordinator/VoldemortHttpRequestHandler.java b/src/java/voldemort/coordinator/VoldemortHttpRequestHandler.java
index 0af1a280c0..a6b9553a4a 100644
--- a/src/java/voldemort/coordinator/VoldemortHttpRequestHandler.java
+++ b/src/java/voldemort/coordinator/VoldemortHttpRequestHandler.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2008-2013 LinkedIn, Inc
+ * Copyright 2013 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
@@ -16,7 +16,6 @@
package voldemort.coordinator;
-import static org.jboss.netty.handler.codec.http.HttpHeaders.isKeepAlive;
import static org.jboss.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST;
import java.io.IOException;
@@ -35,19 +34,19 @@
import org.jboss.netty.channel.MessageEvent;
import org.jboss.netty.channel.SimpleChannelUpstreamHandler;
import org.jboss.netty.handler.codec.http.HttpChunk;
-import org.jboss.netty.handler.codec.http.HttpChunkTrailer;
import org.jboss.netty.handler.codec.http.HttpMethod;
import org.jboss.netty.handler.codec.http.HttpRequest;
-import org.jboss.netty.util.CharsetUtil;
import voldemort.common.VoldemortOpCode;
import voldemort.store.CompositeDeleteVoldemortRequest;
import voldemort.store.CompositeGetAllVoldemortRequest;
import voldemort.store.CompositeGetVoldemortRequest;
import voldemort.store.CompositePutVoldemortRequest;
+import voldemort.store.CompositeVersionedPutVoldemortRequest;
import voldemort.store.CompositeVoldemortRequest;
import voldemort.utils.ByteArray;
import voldemort.versioning.VectorClock;
+import voldemort.versioning.Versioned;
/**
* A class to handle the HTTP request and execute the same on behalf of the thin
@@ -60,8 +59,6 @@ public class VoldemortHttpRequestHandler extends SimpleChannelUpstreamHandler {
public HttpRequest request;
private boolean readingChunks;
- /** Buffer that stores the response content */
- private final StringBuilder buf = new StringBuilder();
private Map fatClientMap;
private final Logger logger = Logger.getLogger(VoldemortHttpRequestHandler.class);
public static final String X_VOLD_REQUEST_TIMEOUT_MS = "X-VOLD-Request-Timeout-ms";
@@ -70,20 +67,25 @@ public class VoldemortHttpRequestHandler extends SimpleChannelUpstreamHandler {
public static final String CUSTOM_RESOLVING_STRATEGY = "custom";
public static final String DEFAULT_RESOLVING_STRATEGY = "timestamp";
+ private CoordinatorErrorStats errorStats = null;
+
// Implicit constructor defined for the derived classes
public VoldemortHttpRequestHandler() {}
- public VoldemortHttpRequestHandler(Map fatClientMap) {
+ public VoldemortHttpRequestHandler(Map fatClientMap,
+ CoordinatorErrorStats errorStats) {
this.fatClientMap = fatClientMap;
+ this.errorStats = errorStats;
}
/**
- * Function to parse the HTTP headers and build a Voldemort request object
+ * Function to parse (and validate) the HTTP headers and build a Voldemort
+ * request object
*
* @param requestURI URI of the REST request
* @param httpMethod Message Event object used to write the response to
* @param e The REST (Voldemort) operation type
- * @return true if a valid request was received. False otherwise
+ * @return A composite request object corresponding to the incoming request
*/
private CompositeVoldemortRequest parseRequest(String requestURI,
MessageEvent e,
@@ -153,19 +155,23 @@ private CompositeVoldemortRequest parseRequest(String request
return null;
}
byte[] putValue = readValue(content);
- requestWrapper = new CompositePutVoldemortRequest(putKey,
- putValue,
- operationTimeoutInMs);
+ VectorClock putOpVectorClock = getVectorClock(this.request.getHeader(X_VOLD_VECTOR_CLOCK));
+ if(putOpVectorClock != null && putOpVectorClock.getEntries().size() > 0) {
+ requestWrapper = new CompositeVersionedPutVoldemortRequest(putKey,
+ new Versioned(putValue,
+ putOpVectorClock),
+ operationTimeoutInMs);
+ } else {
+ requestWrapper = new CompositePutVoldemortRequest(putKey,
+ putValue,
+ operationTimeoutInMs);
+ }
break;
case VoldemortOpCode.DELETE_OP_CODE:
- VectorClock vc = getVectorClock(this.request.getHeader(X_VOLD_VECTOR_CLOCK));
- if(vc == null) {
- // handleBadRequest(e,
- // "Incorrect vector clock specified in the request");
- }
+ VectorClock deleteOpVectorClock = getVectorClock(this.request.getHeader(X_VOLD_VECTOR_CLOCK));
requestWrapper = new CompositeDeleteVoldemortRequest(keyList.get(0),
- vc,
+ deleteOpVectorClock,
operationTimeoutInMs);
break;
@@ -192,6 +198,8 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) throws Ex
readingChunks = true;
} else {
+ long startTimeStampInNs = System.nanoTime();
+
CompositeVoldemortRequest requestObject = parseRequest(requestURI,
e,
this.request.getMethod());
@@ -205,11 +213,13 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) throws Ex
}
if(storeName == null || fatClientWrapper == null) {
+ this.errorStats.reportException(new IllegalArgumentException());
handleBadRequest(e, "Invalid store name. Critical error.");
return;
}
if(requestObject == null) {
+ this.errorStats.reportException(new IllegalArgumentException());
handleBadRequest(e, "Illegal request.");
return;
}
@@ -219,28 +229,28 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) throws Ex
if(logger.isDebugEnabled()) {
logger.debug("Incoming get request");
}
- fatClientWrapper.submitGetRequest(requestObject, e);
+ fatClientWrapper.submitGetRequest(requestObject, e, startTimeStampInNs);
break;
case VoldemortOpCode.GET_ALL_OP_CODE:
- fatClientWrapper.submitGetAllRequest(requestObject, e, storeName);
+ fatClientWrapper.submitGetAllRequest(requestObject,
+ e,
+ storeName,
+ startTimeStampInNs);
break;
case VoldemortOpCode.PUT_OP_CODE:
if(logger.isDebugEnabled()) {
logger.debug("Incoming put request");
}
- fatClientWrapper.submitPutRequest(requestObject, e);
+ fatClientWrapper.submitPutRequest(requestObject, e, startTimeStampInNs);
break;
case VoldemortOpCode.DELETE_OP_CODE:
- fatClientWrapper.submitDeleteRequest(requestObject, e);
+ fatClientWrapper.submitDeleteRequest(requestObject, e, startTimeStampInNs);
break;
default:
String errorMessage = "Illegal operation.";
logger.error(errorMessage);
- RESTErrorHandler.handleError(BAD_REQUEST,
- e,
- isKeepAlive(request),
- errorMessage);
+ RESTErrorHandler.handleError(BAD_REQUEST, e, errorMessage);
return;
}
@@ -249,23 +259,7 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) throws Ex
HttpChunk chunk = (HttpChunk) e.getMessage();
if(chunk.isLast()) {
readingChunks = false;
- buf.append("END OF CONTENT\r\n");
-
- HttpChunkTrailer trailer = (HttpChunkTrailer) chunk;
- if(!trailer.getHeaderNames().isEmpty()) {
- buf.append("\r\n");
- for(String name: trailer.getHeaderNames()) {
- for(String value: trailer.getHeaders(name)) {
- buf.append("TRAILING HEADER: " + name + " = " + value + "\r\n");
- }
- }
- buf.append("\r\n");
- }
-
- } else {
- buf.append("CHUNK: " + chunk.getContent().toString(CharsetUtil.UTF_8) + "\r\n");
}
-
}
}
@@ -278,6 +272,11 @@ public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) throws Ex
*/
private VectorClock getVectorClock(String vectorClockHeader) {
VectorClock vc = null;
+
+ if(vectorClockHeader == null) {
+ return null;
+ }
+
ObjectMapper mapper = new ObjectMapper();
if(logger.isDebugEnabled()) {
logger.debug("Received vector clock : " + vectorClockHeader);
@@ -312,7 +311,7 @@ private VectorClock getVectorClock(String vectorClockHeader) {
private void handleBadRequest(MessageEvent e, String msg) {
String errorMessage = msg;
logger.error(errorMessage);
- RESTErrorHandler.handleError(BAD_REQUEST, e, false, errorMessage);
+ RESTErrorHandler.handleError(BAD_REQUEST, e, errorMessage);
}
/**
diff --git a/src/java/voldemort/routing/ConsistentRoutingStrategy.java b/src/java/voldemort/routing/ConsistentRoutingStrategy.java
index fa226c6ca5..cb7f48d6ef 100644
--- a/src/java/voldemort/routing/ConsistentRoutingStrategy.java
+++ b/src/java/voldemort/routing/ConsistentRoutingStrategy.java
@@ -113,12 +113,12 @@ public List routeRequest(byte[] key) {
preferenceList.add(partitionToNode[partition]);
}
if(logger.isDebugEnabled()) {
- StringBuilder nodeList = new StringBuilder();
+ List nodeIdList = new ArrayList();
for(int partition: partitionList) {
- nodeList.append(partitionToNode[partition].getId() + ",");
+ nodeIdList.add(partitionToNode[partition].getId());
}
- logger.debug("Key " + ByteUtils.toHexString(key) + " mapped to Nodes [" + nodeList
- + "] Partitions [" + partitionList + "]");
+ logger.debug("Key " + ByteUtils.toHexString(key) + " mapped to Nodes " + nodeIdList
+ + " Partitions " + partitionList);
}
return preferenceList;
}
diff --git a/src/java/voldemort/routing/RoutingStrategy.java b/src/java/voldemort/routing/RoutingStrategy.java
index 827d861657..07c7b9050e 100644
--- a/src/java/voldemort/routing/RoutingStrategy.java
+++ b/src/java/voldemort/routing/RoutingStrategy.java
@@ -49,6 +49,11 @@ public interface RoutingStrategy {
/**
* Get the partition list for the given key.
*
+ * TODO: The naming of this method is confusing.. it is simply a wrapper
+ * around {@link RoutingStrategy#getReplicatingPartitionList(int)} that
+ * takes a key. So, would be good to rename this also as
+ * getReplicatingPartitionList
+ *
* @param key The key the operation is operating on
* @return The partition list for the given key
*/
diff --git a/src/java/voldemort/utils/StoreInstance.java b/src/java/voldemort/routing/StoreRoutingPlan.java
similarity index 69%
rename from src/java/voldemort/utils/StoreInstance.java
rename to src/java/voldemort/routing/StoreRoutingPlan.java
index bc731b9371..8e890e7c31 100644
--- a/src/java/voldemort/utils/StoreInstance.java
+++ b/src/java/voldemort/routing/StoreRoutingPlan.java
@@ -14,7 +14,7 @@
* the License.
*/
-package voldemort.utils;
+package voldemort.routing;
import java.util.ArrayList;
import java.util.HashMap;
@@ -24,9 +24,13 @@
import voldemort.VoldemortException;
import voldemort.cluster.Cluster;
-import voldemort.routing.RoutingStrategyFactory;
-import voldemort.routing.RoutingStrategyType;
+import voldemort.cluster.Node;
import voldemort.store.StoreDefinition;
+import voldemort.utils.ByteUtils;
+import voldemort.utils.ClusterUtils;
+import voldemort.utils.NodeUtils;
+import voldemort.utils.Pair;
+import voldemort.utils.Utils;
import com.google.common.collect.Lists;
@@ -34,25 +38,23 @@
/**
* This class wraps up a Cluster object and a StoreDefinition. The methods are
- * effectively helper or util style methods for analyzing partitions and so on
- * which are a function of both Cluster and StoreDefinition.
+ * effectively helper or util style methods for querying the routing plan that
+ * will be generated for a given routing strategy upon store and cluster
+ * topology information.
*/
-public class StoreInstance {
-
- // TODO: (refactor) Improve upon the name "StoreInstance". Object-oriented
- // meaning of 'instance' is too easily confused with system notion of an
- // "instance of a cluster" (the intended usage in this class name).
+public class StoreRoutingPlan {
private final Cluster cluster;
private final StoreDefinition storeDefinition;
-
private final Map partitionIdToNodeIdMap;
+ private final RoutingStrategy routingStrategy;
- public StoreInstance(Cluster cluster, StoreDefinition storeDefinition) {
+ public StoreRoutingPlan(Cluster cluster, StoreDefinition storeDefinition) {
this.cluster = cluster;
this.storeDefinition = storeDefinition;
-
- partitionIdToNodeIdMap = ClusterUtils.getCurrentPartitionMapping(cluster);
+ this.partitionIdToNodeIdMap = ClusterUtils.getCurrentPartitionMapping(cluster);
+ this.routingStrategy = new RoutingStrategyFactory().updateRoutingStrategy(storeDefinition,
+ cluster);
}
public Cluster getCluster() {
@@ -69,19 +71,28 @@ public StoreDefinition getStoreDefinition() {
* @param masterPartitionId
* @return List of partition IDs that replicate the master partition ID.
*/
- public List getReplicationPartitionList(int masterPartitionId) {
- return new RoutingStrategyFactory().updateRoutingStrategy(storeDefinition, cluster)
- .getReplicatingPartitionList(masterPartitionId);
+ public List getReplicatingPartitionList(int masterPartitionId) {
+ return this.routingStrategy.getReplicatingPartitionList(masterPartitionId);
}
/**
* Determines list of partition IDs that replicate the key.
*
* @param key
- * @return List of partition IDs that replicate the partition ID.
+ * @return List of partition IDs that replicate the given key
*/
- public List getReplicationPartitionList(final byte[] key) {
- return getReplicationPartitionList(getMasterPartitionId(key));
+ public List getReplicatingPartitionList(final byte[] key) {
+ return this.routingStrategy.getPartitionList(key);
+ }
+
+ /**
+ * Determines the list of nodes that the key replicates to
+ *
+ * @param key
+ * @return list of nodes that key replicates to
+ */
+ public List getReplicationNodeList(final byte[] key) {
+ return NodeUtils.getNodeIds(this.routingStrategy.routeRequest(key));
}
/**
@@ -91,8 +102,7 @@ public List getReplicationPartitionList(final byte[] key) {
* @return
*/
public int getMasterPartitionId(final byte[] key) {
- return new RoutingStrategyFactory().updateRoutingStrategy(storeDefinition, cluster)
- .getMasterPartition(key);
+ return this.routingStrategy.getMasterPartition(key);
}
/**
@@ -113,8 +123,11 @@ public int getNodeIdForPartitionId(int partitionId) {
* @return partitionId if found, otherwise null.
*/
public Integer getNodesPartitionIdForKey(int nodeId, final byte[] key) {
- List partitionIds = getReplicationPartitionList(key);
+ // this is all the partitions the key replicates to.
+ List partitionIds = getReplicatingPartitionList(key);
for(Integer partitionId: partitionIds) {
+ // check which of the replicating partitions belongs to the node in
+ // question
if(getNodeIdForPartitionId(partitionId) == nodeId) {
return partitionId;
}
@@ -147,8 +160,88 @@ private List getNodeIdListForPartitionIdList(List partitionIds
return nodeIds;
}
+ /**
+ * Returns the list of node ids this partition replicates to.
+ *
+ * TODO ideally the {@link RoutingStrategy} should house a routeRequest(int
+ * partition) method
+ *
+ * @param partitionId
+ * @return
+ * @throws VoldemortException
+ */
public List getReplicationNodeList(int partitionId) throws VoldemortException {
- return getNodeIdListForPartitionIdList(getReplicationPartitionList(partitionId));
+ return getNodeIdListForPartitionIdList(getReplicatingPartitionList(partitionId));
+ }
+
+ /**
+ * Given a key that belong to a given node, returns a number n (< zone
+ * replication factor), such that the given node holds the key as the nth
+ * replica of the given zone
+ *
+ * eg: if the method returns 1, then given node hosts the key as the zone
+ * secondary in the given zone
+ *
+ * @param zoneId
+ * @param nodeId
+ * @param key
+ * @return
+ */
+ public int getZoneReplicaType(int zoneId, int nodeId, byte[] key) {
+ List replicatingNodes = this.routingStrategy.routeRequest(key);
+ int zoneReplicaType = -1;
+ for(Node node: replicatingNodes) {
+ // bump up the replica number once you encounter a node in the given
+ // zone
+ if(node.getZoneId() == zoneId) {
+ zoneReplicaType++;
+ }
+ // we are done when we find the given node
+ if(node.getId() == nodeId) {
+ return zoneReplicaType;
+ }
+ }
+ if(zoneReplicaType > -1) {
+ throw new VoldemortException("Node " + nodeId + " not a replica for the key "
+ + ByteUtils.toHexString(key) + " in given zone " + zoneId);
+ } else {
+ throw new VoldemortException("Could not find any replicas for the key "
+ + ByteUtils.toHexString(key) + " in given zone " + zoneId);
+ }
+ }
+
+ /**
+ * Given a key and a replica type n (< zone replication factor), figure out
+ * the node that contains the key as the nth replica in the given zone.
+ *
+ * @param zoneId
+ * @param zoneReplicaType
+ * @param key
+ * @return
+ */
+ public int getZoneReplicaNode(int zoneId, int zoneReplicaType, byte[] key) {
+ List replicatingNodes = this.routingStrategy.routeRequest(key);
+ int zoneReplicaTypeCounter = -1;
+ for(Node node: replicatingNodes) {
+ // bump up the counter if we encounter a replica in the given zone
+ if(node.getZoneId() == zoneId) {
+ zoneReplicaTypeCounter++;
+ }
+ // when the counter matches up with the replicaNumber we need, we
+ // are done.
+ if(zoneReplicaTypeCounter == zoneReplicaType) {
+ return node.getId();
+ }
+ }
+ if(zoneReplicaTypeCounter == -1) {
+ throw new VoldemortException("Could not find any replicas for the key "
+ + ByteUtils.toHexString(key) + " in given zone " + zoneId);
+ } else {
+ throw new VoldemortException("Could not find " + (zoneReplicaType + 1)
+ + " replicas for the key " + ByteUtils.toHexString(key)
+ + " in given zone " + zoneId + ". Only found "
+ + (zoneReplicaTypeCounter + 1));
+ }
}
// TODO: (refactor) Move from static methods to non-static methods that use
@@ -212,9 +305,9 @@ public static boolean checkKeyBelongsToPartition(int nodeId,
cluster)
.getPartitionList(key);
List nodePartitions = cluster.getNodeById(nodeId).getPartitionIds();
- checkResult = StoreInstance.checkKeyBelongsToPartition(keyPartitions,
- nodePartitions,
- replicaToPartitionList);
+ checkResult = StoreRoutingPlan.checkKeyBelongsToPartition(keyPartitions,
+ nodePartitions,
+ replicaToPartitionList);
}
return checkResult;
}
@@ -266,9 +359,9 @@ public static List checkKeyBelongsToPartition(byte[] key,
for(Pair>> stealNodeToMap: stealerNodeToMappingTuples) {
List nodePartitions = cluster.getNodeById(stealNodeToMap.getFirst())
.getPartitionIds();
- if(StoreInstance.checkKeyBelongsToPartition(keyPartitions,
- nodePartitions,
- stealNodeToMap.getSecond())) {
+ if(StoreRoutingPlan.checkKeyBelongsToPartition(keyPartitions,
+ nodePartitions,
+ stealNodeToMap.getSecond())) {
nodesToPush.add(stealNodeToMap.getFirst());
}
}
diff --git a/src/java/voldemort/server/VoldemortConfig.java b/src/java/voldemort/server/VoldemortConfig.java
index d7eb7bdfb6..83a86e3223 100644
--- a/src/java/voldemort/server/VoldemortConfig.java
+++ b/src/java/voldemort/server/VoldemortConfig.java
@@ -202,6 +202,7 @@ public class VoldemortConfig implements Serializable {
private long streamMaxReadBytesPerSec;
private long streamMaxWriteBytesPerSec;
+ private boolean multiVersionStreamingPutsEnabled;
private int gossipIntervalMs;
private String failureDetectorImplementation;
@@ -223,6 +224,10 @@ public class VoldemortConfig implements Serializable {
private int maxParallelStoresRebalancing;
private boolean rebalancingOptimization;
private boolean usePartitionScanForRebalance;
+ private int maxProxyPutThreads;
+ @Deprecated
+ // Should be removed once the proxy put implementation is stable.
+ private boolean proxyPutsDuringRebalance;
public VoldemortConfig(Properties props) {
this(new Props(props));
@@ -349,6 +354,8 @@ public VoldemortConfig(Props props) {
this.streamMaxReadBytesPerSec = props.getBytes("stream.read.byte.per.sec", 10 * 1000 * 1000);
this.streamMaxWriteBytesPerSec = props.getBytes("stream.write.byte.per.sec",
10 * 1000 * 1000);
+ this.multiVersionStreamingPutsEnabled = props.getBoolean("use.multi.version.streaming.puts",
+ true);
this.socketTimeoutMs = props.getInt("socket.timeout.ms", 5000);
this.socketBufferSize = (int) props.getBytes("socket.buffer.size", 64 * 1024);
@@ -462,6 +469,8 @@ public VoldemortConfig(Props props) {
this.rebalancingOptimization = props.getBoolean("rebalancing.optimization", true);
this.usePartitionScanForRebalance = props.getBoolean("use.partition.scan.for.rebalance",
true);
+ this.maxProxyPutThreads = props.getInt("max.proxy.put.threads", 1);
+ this.proxyPutsDuringRebalance = props.getBoolean("proxy.puts.during.rebalance", true);
this.failureDetectorImplementation = props.getString("failuredetector.implementation",
FailureDetectorConfig.DEFAULT_IMPLEMENTATION_CLASS_NAME);
@@ -1452,6 +1461,25 @@ public long getSlopMaxWriteBytesPerSec() {
return slopMaxWriteBytesPerSec;
}
+ /**
+ * If true, multiple successive versions of the same key, will be atomically
+ * written to storage in a single operation. Currently not supported for
+ * MySqlStorageEngine
+ *
+ *
+ * - Property : "use.multi.version.streaming.puts"
+ * - Default : true
+ *
+ *
+ */
+ public void setMultiVersionStreamingPutsEnabled(boolean multiVersionStreamingPutsEnabled) {
+ this.multiVersionStreamingPutsEnabled = multiVersionStreamingPutsEnabled;
+ }
+
+ public boolean getMultiVersionStreamingPutsEnabled() {
+ return this.multiVersionStreamingPutsEnabled;
+ }
+
/**
* Controls the rate at which the {@link StreamingSlopPusherJob} will send
* slop writes over the wire
@@ -2664,6 +2692,40 @@ public boolean usePartitionScanForRebalance() {
return usePartitionScanForRebalance;
}
+ /**
+ * Total number of threads needed to issue proxy puts during rebalancing
+ *
+ *
+ * - Property :"max.proxy.put.threads"
+ * - Default : 1
+ *
+ */
+ public void setMaxProxyPutThreads(int maxProxyPutThreads) {
+ this.maxProxyPutThreads = maxProxyPutThreads;
+ }
+
+ public int getMaxProxyPutThreads() {
+ return this.maxProxyPutThreads;
+ }
+
+ /**
+ * If set to true, the puts to the new replicas will be relayed back to the
+ * original donor nodes, such that they exist if rebalance were to abort in
+ * the middle for some reason.
+ *
+ *
+ * - Property :"proxy.puts.during.rebalance"
+ * - Default :false
+ *
+ */
+ public void setProxyPutsDuringRebalance(boolean proxyPutsDuringRebalance) {
+ this.proxyPutsDuringRebalance = proxyPutsDuringRebalance;
+ }
+
+ public boolean getProxyPutsDuringRebalance() {
+ return this.proxyPutsDuringRebalance;
+ }
+
/**
* Enables fast, efficient range scans to be used for rebalancing
*
diff --git a/src/java/voldemort/server/http/gui/ReadOnlyStoreManagementServlet.java b/src/java/voldemort/server/http/gui/ReadOnlyStoreManagementServlet.java
index d21094905f..a9bface915 100644
--- a/src/java/voldemort/server/http/gui/ReadOnlyStoreManagementServlet.java
+++ b/src/java/voldemort/server/http/gui/ReadOnlyStoreManagementServlet.java
@@ -196,7 +196,8 @@ private void doSwap(HttpServletRequest req, HttpServletResponse resp) throws IOE
String storeName = getRequired(req, "store");
if(metadataStore != null
- && !metadataStore.getServerState().equals(MetadataStore.VoldemortState.NORMAL_SERVER)) {
+ && !metadataStore.getServerStateUnlocked()
+ .equals(MetadataStore.VoldemortState.NORMAL_SERVER)) {
throw new ServletException("Voldemort server not in normal state");
}
diff --git a/src/java/voldemort/server/protocol/admin/AdminServiceRequestHandler.java b/src/java/voldemort/server/protocol/admin/AdminServiceRequestHandler.java
index 109afc6276..db0313e8fb 100644
--- a/src/java/voldemort/server/protocol/admin/AdminServiceRequestHandler.java
+++ b/src/java/voldemort/server/protocol/admin/AdminServiceRequestHandler.java
@@ -38,9 +38,11 @@
import voldemort.client.protocol.pb.VAdminProto;
import voldemort.client.protocol.pb.VAdminProto.RebalancePartitionInfoMap;
import voldemort.client.protocol.pb.VAdminProto.VoldemortAdminRequest;
+import voldemort.client.protocol.pb.VProto.KeyedVersions;
import voldemort.client.rebalance.RebalancePartitionsInfo;
import voldemort.cluster.Cluster;
import voldemort.common.nio.ByteBufferBackedInputStream;
+import voldemort.routing.StoreRoutingPlan;
import voldemort.server.StoreRepository;
import voldemort.server.VoldemortConfig;
import voldemort.server.protocol.RequestHandler;
@@ -55,6 +57,7 @@
import voldemort.store.StoreOperationFailureException;
import voldemort.store.backup.NativeBackupable;
import voldemort.store.metadata.MetadataStore;
+import voldemort.store.mysql.MysqlStorageEngine;
import voldemort.store.readonly.FileFetcher;
import voldemort.store.readonly.ReadOnlyStorageConfiguration;
import voldemort.store.readonly.ReadOnlyStorageEngine;
@@ -68,7 +71,6 @@
import voldemort.utils.Pair;
import voldemort.utils.RebalanceUtils;
import voldemort.utils.ReflectUtils;
-import voldemort.utils.StoreInstance;
import voldemort.utils.Utils;
import voldemort.versioning.ObsoleteVersionException;
import voldemort.versioning.VectorClock;
@@ -321,12 +323,14 @@ public VAdminProto.RebalanceStateChangeResponse handleRebalanceStateChange(VAdmi
Cluster cluster = new ClusterMapper().readCluster(new StringReader(request.getClusterString()));
+ List storeDefs = new StoreDefinitionsMapper().readStoreList(new StringReader(request.getStoresString()));
boolean swapRO = request.getSwapRo();
boolean changeClusterMetadata = request.getChangeClusterMetadata();
boolean changeRebalanceState = request.getChangeRebalanceState();
boolean rollback = request.getRollback();
rebalancer.rebalanceStateChange(cluster,
+ storeDefs,
rebalancePartitionsInfo,
swapRO,
changeClusterMetadata,
@@ -378,7 +382,7 @@ public VAdminProto.AsyncOperationStatusResponse handleRebalanceNode(VAdminProto.
+ metadataStore.getNodeId());
// We should be in rebalancing state to run this function
- if(!metadataStore.getServerState()
+ if(!metadataStore.getServerStateUnlocked()
.equals(MetadataStore.VoldemortState.REBALANCING_MASTER_SERVER)) {
response.setError(ProtoUtils.encodeError(errorCodeMapper,
new VoldemortException("Voldemort server "
@@ -548,42 +552,57 @@ public StreamRequestHandler handleFetchPartitionEntries(VAdminProto.FetchPartiti
if(fetchValues) {
if(storageEngine.isPartitionScanSupported() && !fetchOrphaned)
return new PartitionScanFetchEntriesRequestHandler(request,
- metadataStore,
- errorCodeMapper,
- voldemortConfig,
- storeRepository,
- networkClassLoader);
+ metadataStore,
+ errorCodeMapper,
+ voldemortConfig,
+ storeRepository,
+ networkClassLoader);
else
return new FullScanFetchEntriesRequestHandler(request,
- metadataStore,
- errorCodeMapper,
- voldemortConfig,
- storeRepository,
- networkClassLoader);
+ metadataStore,
+ errorCodeMapper,
+ voldemortConfig,
+ storeRepository,
+ networkClassLoader);
} else {
if(storageEngine.isPartitionScanSupported() && !fetchOrphaned)
return new PartitionScanFetchKeysRequestHandler(request,
- metadataStore,
- errorCodeMapper,
- voldemortConfig,
- storeRepository,
- networkClassLoader);
+ metadataStore,
+ errorCodeMapper,
+ voldemortConfig,
+ storeRepository,
+ networkClassLoader);
else
return new FullScanFetchKeysRequestHandler(request,
- metadataStore,
- errorCodeMapper,
- voldemortConfig,
- storeRepository,
- networkClassLoader);
+ metadataStore,
+ errorCodeMapper,
+ voldemortConfig,
+ storeRepository,
+ networkClassLoader);
}
}
public StreamRequestHandler handleUpdatePartitionEntries(VAdminProto.UpdatePartitionEntriesRequest request) {
- return new UpdatePartitionEntriesStreamRequestHandler(request,
- errorCodeMapper,
- voldemortConfig,
- storeRepository,
- networkClassLoader);
+ StorageEngine storageEngine = AdminServiceRequestHandler.getStorageEngine(storeRepository,
+ request.getStore());
+ if(!voldemortConfig.getMultiVersionStreamingPutsEnabled()
+ || storageEngine instanceof MysqlStorageEngine) {
+ // TODO This check is ugly. Need some generic capability to check
+ // which storage engine supports which operations.
+ return new UpdatePartitionEntriesStreamRequestHandler(request,
+ errorCodeMapper,
+ voldemortConfig,
+ storageEngine,
+ storeRepository,
+ networkClassLoader);
+ } else {
+ return new BufferedUpdatePartitionEntriesStreamRequestHandler(request,
+ errorCodeMapper,
+ voldemortConfig,
+ storageEngine,
+ storeRepository,
+ networkClassLoader);
+ }
}
public VAdminProto.AsyncOperationListResponse handleAsyncOperationList(VAdminProto.AsyncOperationListRequest request) {
@@ -708,7 +727,8 @@ public VAdminProto.SwapStoreResponse handleSwapROStore(VAdminProto.SwapStoreRequ
final String storeName = request.getStoreName();
VAdminProto.SwapStoreResponse.Builder response = VAdminProto.SwapStoreResponse.newBuilder();
- if(!metadataStore.getServerState().equals(MetadataStore.VoldemortState.NORMAL_SERVER)) {
+ if(!metadataStore.getServerStateUnlocked()
+ .equals(MetadataStore.VoldemortState.NORMAL_SERVER)) {
response.setError(ProtoUtils.encodeError(errorCodeMapper,
new VoldemortException("Voldemort server "
+ metadataStore.getNodeId()
@@ -1088,12 +1108,12 @@ public VAdminProto.DeletePartitionEntriesResponse handleDeletePartitionEntries(V
ByteArray key = entry.getFirst();
Versioned value = entry.getSecond();
throttler.maybeThrottle(key.length() + valueSize(value));
- if(StoreInstance.checkKeyBelongsToPartition(metadataStore.getNodeId(),
- key.get(),
- replicaToPartitionList,
- request.hasInitialCluster() ? new ClusterMapper().readCluster(new StringReader(request.getInitialCluster()))
- : metadataStore.getCluster(),
- metadataStore.getStoreDef(storeName))
+ if(StoreRoutingPlan.checkKeyBelongsToPartition(metadataStore.getNodeId(),
+ key.get(),
+ replicaToPartitionList,
+ request.hasInitialCluster() ? new ClusterMapper().readCluster(new StringReader(request.getInitialCluster()))
+ : metadataStore.getCluster(),
+ metadataStore.getStoreDef(storeName))
&& filter.accept(key, value)) {
if(storageEngine.delete(key, value.getVersion())) {
deleteSuccess++;
@@ -1124,23 +1144,32 @@ public VAdminProto.DeletePartitionEntriesResponse handleDeletePartitionEntries(V
public VAdminProto.UpdateMetadataResponse handleUpdateMetadata(VAdminProto.UpdateMetadataRequest request) {
VAdminProto.UpdateMetadataResponse.Builder response = VAdminProto.UpdateMetadataResponse.newBuilder();
+ metadataStore.writeLock.lock();
try {
- ByteArray key = ProtoUtils.decodeBytes(request.getKey());
- String keyString = ByteUtils.getString(key.get(), "UTF-8");
- if(MetadataStore.METADATA_KEYS.contains(keyString)) {
- Versioned versionedValue = ProtoUtils.decodeVersioned(request.getVersioned());
- logger.info("Updating metadata for key '" + keyString + "'");
- metadataStore.put(new ByteArray(ByteUtils.getBytes(keyString, "UTF-8")),
- versionedValue,
- null);
- logger.info("Successfully updated metadata for key '" + keyString + "'");
+ for(KeyedVersions keyValue: request.getMetadataEntryList()) {
+
+ try {
+ ByteArray key = ProtoUtils.decodeBytes(keyValue.getKey());
+ String keyString = ByteUtils.getString(key.get(), "UTF-8");
+ if(MetadataStore.METADATA_KEYS.contains(keyString)) {
+ Versioned versionedValue = ProtoUtils.decodeVersionedMetadataKeyValue(keyValue);
+ logger.info("Updating metadata for key '" + keyString + "'");
+ metadataStore.put(new ByteArray(ByteUtils.getBytes(keyString, "UTF-8")),
+ versionedValue,
+ null);
+ logger.info("Successfully updated metadata for key '" + keyString + "'");
+ }
+ } catch(VoldemortException e) {
+ response.setError(ProtoUtils.encodeError(errorCodeMapper, e));
+ logger.error("handleUpdateMetadata failed for request(" + request.toString()
+ + ")", e);
+ }
}
- } catch(VoldemortException e) {
- response.setError(ProtoUtils.encodeError(errorCodeMapper, e));
- logger.error("handleUpdateMetadata failed for request(" + request.toString() + ")", e);
- }
- return response.build();
+ return response.build();
+ } finally {
+ metadataStore.writeLock.unlock();
+ }
}
public VAdminProto.GetMetadataResponse handleGetMetadata(VAdminProto.GetMetadataRequest request) {
@@ -1190,7 +1219,8 @@ public VAdminProto.DeleteStoreResponse handleDeleteStore(VAdminProto.DeleteStore
VAdminProto.DeleteStoreResponse.Builder response = VAdminProto.DeleteStoreResponse.newBuilder();
// don't try to delete a store in the middle of rebalancing
- if(!metadataStore.getServerState().equals(MetadataStore.VoldemortState.NORMAL_SERVER)) {
+ if(!metadataStore.getServerStateUnlocked()
+ .equals(MetadataStore.VoldemortState.NORMAL_SERVER)) {
response.setError(ProtoUtils.encodeError(errorCodeMapper,
new VoldemortException("Voldemort server is not in normal state")));
return response.build();
@@ -1267,7 +1297,8 @@ public VAdminProto.AddStoreResponse handleAddStore(VAdminProto.AddStoreRequest r
VAdminProto.AddStoreResponse.Builder response = VAdminProto.AddStoreResponse.newBuilder();
// don't try to add a store when not in normal state
- if(!metadataStore.getServerState().equals(MetadataStore.VoldemortState.NORMAL_SERVER)) {
+ if(!metadataStore.getServerStateUnlocked()
+ .equals(MetadataStore.VoldemortState.NORMAL_SERVER)) {
response.setError(ProtoUtils.encodeError(errorCodeMapper,
new VoldemortException("Voldemort server is not in normal state")));
return response.build();
diff --git a/src/java/voldemort/server/protocol/admin/AsyncOperationService.java b/src/java/voldemort/server/protocol/admin/AsyncOperationService.java
index de0ad16603..fda2f186bf 100644
--- a/src/java/voldemort/server/protocol/admin/AsyncOperationService.java
+++ b/src/java/voldemort/server/protocol/admin/AsyncOperationService.java
@@ -1,12 +1,12 @@
/*
* Copyright 2008-2010 LinkedIn, Inc
- *
+ *
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
@@ -16,10 +16,13 @@
package voldemort.server.protocol.admin;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
-import com.google.common.collect.ImmutableSet;
import org.apache.log4j.Logger;
import voldemort.VoldemortException;
@@ -29,11 +32,14 @@
import voldemort.common.service.SchedulerService;
import voldemort.common.service.ServiceType;
+import com.google.common.collect.ImmutableSet;
+
/**
* Asynchronous job scheduler for admin service operations.
- *
- * TODO: requesting a unique id, then creating an operation with that id seems like a bad API design.
- *
+ *
+ * TODO: requesting a unique id, then creating an operation with that id seems
+ * like a bad API design.
+ *
*/
@JmxManaged(description = "Asynchronous operation execution")
public class AsyncOperationService extends AbstractService {
@@ -96,6 +102,19 @@ public String getStatus(int id) {
}
}
+ public List getMatchingAsyncOperationList(String jobDescPattern, boolean showCompleted) {
+ List operationIds = getAsyncOperationList(showCompleted);
+ List matchingOperationIds = new ArrayList(operationIds.size());
+ for(Integer operationId: operationIds) {
+ AsyncOperation operation = operations.get(operationId);
+ String operationDescription = operation.getStatus().getDescription();
+ if(operationDescription != null && operationDescription.indexOf(jobDescPattern) != -1) {
+ matchingOperationIds.add(operationId);
+ }
+ }
+ return matchingOperationIds;
+ }
+
@JmxOperation(description = "Retrieve all operations")
public String getAllAsyncOperations() {
String result;
@@ -108,23 +127,25 @@ public String getAllAsyncOperations() {
}
/**
- * Get list of asynchronous operations on this node. By default, only the pending
- * operations are returned.
+ * Get list of asynchronous operations on this node. By default, only the
+ * pending operations are returned.
+ *
* @param showCompleted Show completed operations
* @return A list of operation ids.
*/
public List getAsyncOperationList(boolean showCompleted) {
/**
- * Create a copy using an immutable set to avoid a {@link java.util.ConcurrentModificationException}
+ * Create a copy using an immutable set to avoid a
+ * {@link java.util.ConcurrentModificationException}
*/
Set keySet = ImmutableSet.copyOf(operations.keySet());
- if (showCompleted)
+ if(showCompleted)
return new ArrayList(keySet);
List keyList = new ArrayList();
- for (int key: keySet) {
- if (!operations.get(key).getStatus().isComplete())
+ for(int key: keySet) {
+ if(!operations.get(key).getStatus().isComplete())
keyList.add(key);
}
return keyList;
@@ -142,7 +163,7 @@ public AsyncOperationStatus getOperationStatus(int requestId) {
public String stopAsyncOperation(int requestId) {
try {
stopOperation(requestId);
- } catch (VoldemortException e) {
+ } catch(VoldemortException e) {
return e.getMessage();
}
@@ -158,6 +179,7 @@ public void stopOperation(int requestId) {
/**
* Generate a unique request id
+ *
* @return A new, guaranteed unique, request id
*/
public int getUniqueRequestId() {
diff --git a/src/java/voldemort/server/protocol/admin/BufferedUpdatePartitionEntriesStreamRequestHandler.java b/src/java/voldemort/server/protocol/admin/BufferedUpdatePartitionEntriesStreamRequestHandler.java
new file mode 100644
index 0000000000..11f97871af
--- /dev/null
+++ b/src/java/voldemort/server/protocol/admin/BufferedUpdatePartitionEntriesStreamRequestHandler.java
@@ -0,0 +1,207 @@
+package voldemort.server.protocol.admin;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import voldemort.VoldemortException;
+import voldemort.client.protocol.pb.ProtoUtils;
+import voldemort.client.protocol.pb.VAdminProto;
+import voldemort.client.protocol.pb.VAdminProto.UpdatePartitionEntriesRequest;
+import voldemort.server.StoreRepository;
+import voldemort.server.VoldemortConfig;
+import voldemort.store.ErrorCodeMapper;
+import voldemort.store.StorageEngine;
+import voldemort.store.StoreUtils;
+import voldemort.store.stats.StreamingStats.Operation;
+import voldemort.utils.ByteArray;
+import voldemort.utils.ByteUtils;
+import voldemort.utils.NetworkClassLoader;
+import voldemort.utils.Utils;
+import voldemort.versioning.Versioned;
+
+/**
+ * The buffering is so that if we the stream contains multiple versions for the
+ * same key, then we would want the storage to be updated with all the versions
+ * atomically, to make sure client does not read a partial set of versions at
+ * any point
+ *
+ */
+class BufferedUpdatePartitionEntriesStreamRequestHandler extends
+ UpdatePartitionEntriesStreamRequestHandler {
+
+ private static final int VALS_BUFFER_EXPECTED_SIZE = 5;
+ /**
+ * Current key being buffered.
+ */
+ private ByteArray currBufferedKey;
+
+ private List> currBufferedVals;
+
+ public BufferedUpdatePartitionEntriesStreamRequestHandler(UpdatePartitionEntriesRequest request,
+ ErrorCodeMapper errorCodeMapper,
+ VoldemortConfig voldemortConfig,
+ StorageEngine storageEngine,
+ StoreRepository storeRepository,
+ NetworkClassLoader networkClassLoader) {
+ super(request,
+ errorCodeMapper,
+ voldemortConfig,
+ storageEngine,
+ storeRepository,
+ networkClassLoader);
+ currBufferedKey = null;
+ currBufferedVals = new ArrayList>(VALS_BUFFER_EXPECTED_SIZE);
+ }
+
+ @Override
+ protected void finalize() {
+ super.finalize();
+ /*
+ * Also check if we have any pending values being buffered. if so, flush
+ * to storage.
+ */
+ writeBufferedValsToStorageIfAny();
+ }
+
+ /**
+ * Persists the current set of versions buffered for the current key into
+ * storage, using the multiVersionPut api
+ *
+ * NOTE: Now, it could be that the stream broke off and has more pending
+ * versions. For now, we simply commit what we have to disk. A better design
+ * would rely on in-stream markers to do the flushing to storage.
+ */
+ private void writeBufferedValsToStorage() {
+ long startNs = System.nanoTime();
+
+ List> obsoleteVals = storageEngine.multiVersionPut(currBufferedKey,
+ currBufferedVals);
+ currBufferedVals = new ArrayList>(VALS_BUFFER_EXPECTED_SIZE);
+ if(streamStats != null) {
+ streamStats.reportStorageTime(Operation.UPDATE_ENTRIES,
+ Utils.elapsedTimeNs(startNs, System.nanoTime()));
+ streamStats.reportStreamingPut(Operation.UPDATE_ENTRIES);
+ }
+
+ if(logger.isTraceEnabled())
+ logger.trace("updateEntries (Streaming multi-version-put) successful");
+
+ // log Obsolete versions in debug mode
+ if(logger.isDebugEnabled() && obsoleteVals.size() > 0) {
+ logger.debug("updateEntries (Streaming multi-version-put) rejected these versions as obsolete : "
+ + StoreUtils.getVersions(obsoleteVals) + " for key " + currBufferedKey);
+ }
+
+ // log progress
+ counter++;
+ if(0 == counter % STAT_RECORDS_INTERVAL) {
+ long totalTime = (System.currentTimeMillis() - startTime) / 1000;
+
+ logger.info("Update entries updated " + counter + " entries for store '"
+ + storageEngine.getName() + "' in " + totalTime + " s");
+ }
+
+ // throttling
+ int totalValueSize = 0;
+ for(Versioned value: currBufferedVals) {
+ totalValueSize += AdminServiceRequestHandler.valueSize(value);
+ }
+ throttler.maybeThrottle(currBufferedKey.length() + totalValueSize);
+ }
+
+ private void writeBufferedValsToStorageIfAny() {
+ if(currBufferedVals.size() > 0) {
+ writeBufferedValsToStorage();
+ }
+ }
+
+ @Override
+ public StreamRequestHandlerState handleRequest(DataInputStream inputStream,
+ DataOutputStream outputStream)
+ throws IOException {
+ long startNs = System.nanoTime();
+ if(request == null) {
+ int size = 0;
+ try {
+ size = inputStream.readInt();
+ } catch(EOFException e) {
+ if(logger.isTraceEnabled())
+ logger.trace("Incomplete read for message size");
+ if(streamStats != null)
+ streamStats.reportNetworkTime(Operation.UPDATE_ENTRIES,
+ Utils.elapsedTimeNs(startNs, System.nanoTime()));
+ return StreamRequestHandlerState.INCOMPLETE_READ;
+ }
+
+ if(size == -1) {
+ long totalTime = (System.currentTimeMillis() - startTime) / 1000;
+ logger.info("Update entries successfully updated " + counter
+ + " entries for store '" + storageEngine.getName() + "' in "
+ + totalTime + " s");
+ // Write the last buffered key to storage
+ writeBufferedValsToStorage();
+ if(logger.isTraceEnabled())
+ logger.trace("Message size -1, completed partition update");
+ if(streamStats != null)
+ streamStats.reportNetworkTime(Operation.UPDATE_ENTRIES,
+ Utils.elapsedTimeNs(startNs, System.nanoTime()));
+ return StreamRequestHandlerState.COMPLETE;
+ }
+
+ if(logger.isTraceEnabled())
+ logger.trace("UpdatePartitionEntriesRequest message size: " + size);
+
+ byte[] input = new byte[size];
+
+ try {
+ ByteUtils.read(inputStream, input);
+ } catch(EOFException e) {
+ if(logger.isTraceEnabled())
+ logger.trace("Incomplete read for message");
+
+ return StreamRequestHandlerState.INCOMPLETE_READ;
+ } finally {
+ if(streamStats != null)
+ streamStats.reportNetworkTime(Operation.UPDATE_ENTRIES,
+ Utils.elapsedTimeNs(startNs, System.nanoTime()));
+ }
+
+ VAdminProto.UpdatePartitionEntriesRequest.Builder builder = VAdminProto.UpdatePartitionEntriesRequest.newBuilder();
+ builder.mergeFrom(input);
+ request = builder.build();
+ }
+
+ VAdminProto.PartitionEntry partitionEntry = request.getPartitionEntry();
+ ByteArray key = ProtoUtils.decodeBytes(partitionEntry.getKey());
+ Versioned value = ProtoUtils.decodeVersioned(partitionEntry.getVersioned());
+
+ if(filter.accept(key, value)) {
+ // Check if the current key is same as the one before.
+ if(currBufferedKey != null && !key.equals(currBufferedKey)) {
+ // if not, write buffered values for the previous key to storage
+ writeBufferedValsToStorage();
+ }
+ currBufferedKey = key;
+ currBufferedVals.add(value);
+ }
+
+ request = null;
+ return StreamRequestHandlerState.READING;
+ }
+
+ @Override
+ public void close(DataOutputStream outputStream) throws IOException {
+ writeBufferedValsToStorageIfAny();
+ super.close(outputStream);
+ }
+
+ @Override
+ public void handleError(DataOutputStream outputStream, VoldemortException e) throws IOException {
+ writeBufferedValsToStorageIfAny();
+ super.handleError(outputStream, e);
+ }
+}
diff --git a/src/java/voldemort/server/protocol/admin/FetchStreamRequestHandler.java b/src/java/voldemort/server/protocol/admin/FetchStreamRequestHandler.java
index 4bb19ce66b..41e6b8651d 100644
--- a/src/java/voldemort/server/protocol/admin/FetchStreamRequestHandler.java
+++ b/src/java/voldemort/server/protocol/admin/FetchStreamRequestHandler.java
@@ -30,6 +30,7 @@
import voldemort.client.protocol.pb.ProtoUtils;
import voldemort.client.protocol.pb.VAdminProto;
import voldemort.cluster.Cluster;
+import voldemort.routing.StoreRoutingPlan;
import voldemort.server.StoreRepository;
import voldemort.server.VoldemortConfig;
import voldemort.server.protocol.StreamRequestHandler;
@@ -42,7 +43,6 @@
import voldemort.utils.ByteArray;
import voldemort.utils.EventThrottler;
import voldemort.utils.NetworkClassLoader;
-import voldemort.utils.StoreInstance;
import voldemort.utils.Time;
import voldemort.xml.ClusterMapper;
@@ -90,7 +90,7 @@ public abstract class FetchStreamRequestHandler implements StreamRequestHandler
protected boolean fetchOrphaned;
- protected final StoreInstance storeInstance;
+ protected final StoreRoutingPlan storeInstance;
protected FetchStreamRequestHandler(VAdminProto.FetchPartitionEntriesRequest request,
MetadataStore metadataStore,
@@ -119,7 +119,7 @@ protected FetchStreamRequestHandler(VAdminProto.FetchPartitionEntriesRequest req
} else {
this.initialCluster = metadataStore.getCluster();
}
- this.storeInstance = new StoreInstance(this.initialCluster, this.storeDef);
+ this.storeInstance = new StoreRoutingPlan(this.initialCluster, this.storeDef);
this.throttler = new EventThrottler(voldemortConfig.getStreamMaxReadBytesPerSec());
if(request.hasFilter()) {
diff --git a/src/java/voldemort/server/protocol/admin/FullScanFetchStreamRequestHandler.java b/src/java/voldemort/server/protocol/admin/FullScanFetchStreamRequestHandler.java
index 7d63e1baab..0fbbbf77d2 100644
--- a/src/java/voldemort/server/protocol/admin/FullScanFetchStreamRequestHandler.java
+++ b/src/java/voldemort/server/protocol/admin/FullScanFetchStreamRequestHandler.java
@@ -23,6 +23,7 @@
import java.util.Set;
import voldemort.client.protocol.pb.VAdminProto.FetchPartitionEntriesRequest;
+import voldemort.routing.StoreRoutingPlan;
import voldemort.server.StoreRepository;
import voldemort.server.VoldemortConfig;
import voldemort.store.ErrorCodeMapper;
@@ -31,7 +32,6 @@
import voldemort.utils.ByteArray;
import voldemort.utils.ClosableIterator;
import voldemort.utils.NetworkClassLoader;
-import voldemort.utils.StoreInstance;
import voldemort.utils.Utils;
/**
@@ -102,7 +102,7 @@ private Integer getKeyPartitionId(byte[] key) {
* @return true iff key is needed.
*/
protected boolean isKeyNeeded(byte[] key) {
- if(!StoreInstance.checkKeyBelongsToPartition(nodeId,
+ if(!StoreRoutingPlan.checkKeyBelongsToPartition(nodeId,
key,
replicaToPartitionList,
initialCluster,
@@ -134,7 +134,7 @@ protected boolean isItemAccepted(byte[] key) {
entryAccepted = true;
}
} else {
- if(!StoreInstance.checkKeyBelongsToNode(key, nodeId, initialCluster, storeDef)) {
+ if(!StoreRoutingPlan.checkKeyBelongsToNode(key, nodeId, initialCluster, storeDef)) {
entryAccepted = true;
}
}
diff --git a/src/java/voldemort/server/protocol/admin/PartitionScanFetchEntriesRequestHandler.java b/src/java/voldemort/server/protocol/admin/PartitionScanFetchEntriesRequestHandler.java
index 0a1ff69d7b..b496f51574 100644
--- a/src/java/voldemort/server/protocol/admin/PartitionScanFetchEntriesRequestHandler.java
+++ b/src/java/voldemort/server/protocol/admin/PartitionScanFetchEntriesRequestHandler.java
@@ -23,6 +23,7 @@
import voldemort.client.protocol.pb.ProtoUtils;
import voldemort.client.protocol.pb.VAdminProto;
import voldemort.client.protocol.pb.VAdminProto.FetchPartitionEntriesRequest;
+import voldemort.routing.StoreRoutingPlan;
import voldemort.server.StoreRepository;
import voldemort.server.VoldemortConfig;
import voldemort.store.ErrorCodeMapper;
@@ -32,7 +33,6 @@
import voldemort.utils.ClosableIterator;
import voldemort.utils.NetworkClassLoader;
import voldemort.utils.Pair;
-import voldemort.utils.StoreInstance;
import voldemort.versioning.Versioned;
import com.google.protobuf.Message;
@@ -86,7 +86,7 @@ public StreamRequestHandlerState handleRequest(DataInputStream inputStream,
// Check the current node contains the partition as the
// requested replicatype
if(!fetchedPartitions.contains(currentPartition)
- && StoreInstance.checkPartitionBelongsToNode(currentPartition,
+ && StoreRoutingPlan.checkPartitionBelongsToNode(currentPartition,
currentReplicaType,
nodeId,
initialCluster,
diff --git a/src/java/voldemort/server/protocol/admin/PartitionScanFetchKeysRequestHandler.java b/src/java/voldemort/server/protocol/admin/PartitionScanFetchKeysRequestHandler.java
index 351335b4fc..afdd7f789f 100644
--- a/src/java/voldemort/server/protocol/admin/PartitionScanFetchKeysRequestHandler.java
+++ b/src/java/voldemort/server/protocol/admin/PartitionScanFetchKeysRequestHandler.java
@@ -23,6 +23,7 @@
import voldemort.client.protocol.pb.ProtoUtils;
import voldemort.client.protocol.pb.VAdminProto;
import voldemort.client.protocol.pb.VAdminProto.FetchPartitionEntriesRequest;
+import voldemort.routing.StoreRoutingPlan;
import voldemort.server.StoreRepository;
import voldemort.server.VoldemortConfig;
import voldemort.store.ErrorCodeMapper;
@@ -31,7 +32,6 @@
import voldemort.utils.ByteArray;
import voldemort.utils.ClosableIterator;
import voldemort.utils.NetworkClassLoader;
-import voldemort.utils.StoreInstance;
import com.google.protobuf.Message;
@@ -84,7 +84,7 @@ public StreamRequestHandlerState handleRequest(DataInputStream inputStream,
// Check the current node contains the partition as the
// requested replicatype
if(!fetchedPartitions.contains(currentPartition)
- && StoreInstance.checkPartitionBelongsToNode(currentPartition,
+ && StoreRoutingPlan.checkPartitionBelongsToNode(currentPartition,
currentReplicaType,
nodeId,
initialCluster,
diff --git a/src/java/voldemort/server/protocol/admin/UpdatePartitionEntriesStreamRequestHandler.java b/src/java/voldemort/server/protocol/admin/UpdatePartitionEntriesStreamRequestHandler.java
index 53ae284fdd..c8f22085c6 100644
--- a/src/java/voldemort/server/protocol/admin/UpdatePartitionEntriesStreamRequestHandler.java
+++ b/src/java/voldemort/server/protocol/admin/UpdatePartitionEntriesStreamRequestHandler.java
@@ -36,38 +36,38 @@
public class UpdatePartitionEntriesStreamRequestHandler implements StreamRequestHandler {
- private VAdminProto.UpdatePartitionEntriesRequest request;
+ protected VAdminProto.UpdatePartitionEntriesRequest request;
- private final VAdminProto.UpdatePartitionEntriesResponse.Builder responseBuilder = VAdminProto.UpdatePartitionEntriesResponse.newBuilder();
+ protected final VAdminProto.UpdatePartitionEntriesResponse.Builder responseBuilder = VAdminProto.UpdatePartitionEntriesResponse.newBuilder();
- private final ErrorCodeMapper errorCodeMapper;
+ protected final ErrorCodeMapper errorCodeMapper;
- private final EventThrottler throttler;
+ protected final EventThrottler throttler;
- private final VoldemortFilter filter;
+ protected final VoldemortFilter filter;
- private final StorageEngine storageEngine;
+ protected final StorageEngine storageEngine;
- private int counter;
+ protected int counter;
- private final long startTime;
+ protected final long startTime;
- private final StreamingStats streamStats;
+ protected final StreamingStats streamStats;
- private final Logger logger = Logger.getLogger(getClass());
+ protected final Logger logger = Logger.getLogger(getClass());
- private AtomicBoolean isBatchWriteOff;
+ protected AtomicBoolean isBatchWriteOff;
public UpdatePartitionEntriesStreamRequestHandler(UpdatePartitionEntriesRequest request,
ErrorCodeMapper errorCodeMapper,
VoldemortConfig voldemortConfig,
+ StorageEngine storageEngine,
StoreRepository storeRepository,
NetworkClassLoader networkClassLoader) {
super();
this.request = request;
this.errorCodeMapper = errorCodeMapper;
- storageEngine = AdminServiceRequestHandler.getStorageEngine(storeRepository,
- request.getStore());
+ this.storageEngine = storageEngine;
throttler = new EventThrottler(voldemortConfig.getStreamMaxReadBytesPerSec());
filter = (request.hasFilter()) ? AdminServiceRequestHandler.getFilterFromRequest(request.getFilter(),
voldemortConfig,
@@ -92,13 +92,13 @@ protected void finalize() {
storageEngine.endBatchModifications();
}
+ @Override
public StreamRequestHandlerState handleRequest(DataInputStream inputStream,
DataOutputStream outputStream)
throws IOException {
long startNs = System.nanoTime();
if(request == null) {
int size = 0;
-
try {
size = inputStream.readInt();
} catch(EOFException e) {
@@ -187,16 +187,19 @@ public StreamRequestHandlerState handleRequest(DataInputStream inputStream,
return StreamRequestHandlerState.READING;
}
+ @Override
public StreamRequestDirection getDirection() {
return StreamRequestDirection.READING;
}
+ @Override
public void close(DataOutputStream outputStream) throws IOException {
ProtoUtils.writeMessage(outputStream, responseBuilder.build());
storageEngine.endBatchModifications();
isBatchWriteOff.compareAndSet(false, true);
}
+ @Override
public void handleError(DataOutputStream outputStream, VoldemortException e) throws IOException {
responseBuilder.setError(ProtoUtils.encodeError(errorCodeMapper, e));
if(logger.isEnabledFor(Level.ERROR))
diff --git a/src/java/voldemort/server/protocol/admin/UpdateSlopEntriesRequestHandler.java b/src/java/voldemort/server/protocol/admin/UpdateSlopEntriesRequestHandler.java
index fc7fa45ba6..3b07d265c8 100644
--- a/src/java/voldemort/server/protocol/admin/UpdateSlopEntriesRequestHandler.java
+++ b/src/java/voldemort/server/protocol/admin/UpdateSlopEntriesRequestHandler.java
@@ -145,7 +145,8 @@ public StreamRequestHandlerState handleRequest(DataInputStream inputStream,
streamStats.reportStorageTime(Operation.SLOP_UPDATE, System.nanoTime()
- startNs);
if(logger.isTraceEnabled())
- logger.trace("updateSlopEntries (Streaming put) successful");
+ logger.trace("updateSlopEntries (Streaming put) successful on key:" + key
+ + " of store: " + request.getStore());
} catch(ObsoleteVersionException e) {
// log and ignore
if(logger.isDebugEnabled())
diff --git a/src/java/voldemort/server/rebalance/Rebalancer.java b/src/java/voldemort/server/rebalance/Rebalancer.java
index 95c6e8df90..c3cb4f221e 100644
--- a/src/java/voldemort/server/rebalance/Rebalancer.java
+++ b/src/java/voldemort/server/rebalance/Rebalancer.java
@@ -118,63 +118,116 @@ public synchronized void releaseRebalancingPermit(int nodeId) {
*
*
* | swapRO | changeClusterMetadata | changeRebalanceState | Order |
- * | f | t | t | cluster -> rebalance |
+ * | f | t | t | rebalance -> cluster |
* | f | f | t | rebalance |
* | t | t | f | cluster -> swap |
- * | t | t | t | cluster -> swap -> rebalance |
+ * | t | t | t | rebalance -> cluster -> swap|
*
*
* In general we need to do [ cluster change -> swap -> rebalance state
* change ]
*
+ * NOTE: The update of the cluster metadata and the rebalancer state is not
+ * "atomic". Ergo, there could theoretically be a race where a client picks
+ * up new cluster metadata sends a request based on that, but the proxy
+ * bridges have not been setup and we either miss a proxy put or return a
+ * null for get/getalls
+ *
+ * TODO:refactor The rollback logic here is too convoluted. Specifically,
+ * the independent updates to each key could be split up into their own
+ * methods.
+ *
* @param cluster Cluster metadata to change
* @param rebalancePartitionsInfo List of rebalance partitions info
* @param swapRO Boolean to indicate swapping of RO store
- * @param changeClusterMetadata Boolean to indicate a change of cluster
- * metadata
+ * @param changeClusterAndStoresMetadata Boolean to indicate a change of
+ * cluster metadata
* @param changeRebalanceState Boolean to indicate a change in rebalance
* state
* @param rollback Boolean to indicate that we are rolling back or not
*/
public void rebalanceStateChange(Cluster cluster,
+ List storeDefs,
List rebalancePartitionsInfo,
boolean swapRO,
- boolean changeClusterMetadata,
+ boolean changeClusterAndStoresMetadata,
boolean changeRebalanceState,
boolean rollback) {
Cluster currentCluster = metadataStore.getCluster();
+ List currentStoreDefs = metadataStore.getStoreDefList();
- logger.info("Doing rebalance state change with options [ cluster metadata change - "
- + changeClusterMetadata + " ], [ changing rebalancing state - "
+ logger.info("Server doing rebalance state change with options [ cluster metadata change - "
+ + changeClusterAndStoresMetadata + " ], [ changing rebalancing state - "
+ changeRebalanceState + " ], [ changing swapping RO - " + swapRO
+ " ], [ rollback - " + rollback + " ]");
// Variables to track what has completed
List completedRebalancePartitionsInfo = Lists.newArrayList();
List swappedStoreNames = Lists.newArrayList();
- boolean completedClusterChange = false;
+ boolean completedClusterAndStoresChange = false;
+ boolean completedRebalanceSourceClusterChange = false;
+ Cluster previousRebalancingSourceCluster = null;
+ List previousRebalancingSourceStores = null;
try {
- // CHANGE CLUSTER METADATA
- if(changeClusterMetadata) {
- changeCluster(cluster);
- completedClusterChange = true;
- }
- // SWAP RO DATA FOR ALL STORES
- if(swapRO) {
- swapROStores(swappedStoreNames, false);
- }
+ /*
+ * Do the rebalancing state changes. It is important that this
+ * happens before the actual cluster metadata is changed. Here's
+ * what could happen otherwise. When a batch completes with
+ * {current_cluster c2, rebalancing_source_cluster c1} and the next
+ * rebalancing state changes it to {current_cluster c3,
+ * rebalancing_source_cluster c2} is set for the next batch, then
+ * there could be a window during which the state is
+ * {current_cluster c3, rebalancing_source_cluster c1}. On the other
+ * hand, when we update the rebalancing source cluster first, there
+ * is a window where the state is {current_cluster c2,
+ * rebalancing_source_cluster c2}, which still fine, because of the
+ * following. Successful completion of a batch means the cluster is
+ * finalized, so its okay to stop proxying based on {current_cluster
+ * c2, rebalancing_source_cluster c1}. And since the cluster
+ * metadata has not yet been updated to c3, the writes will happen
+ * based on c2.
+ *
+ *
+ * Even if some clients have already seen the {current_cluster c3,
+ * rebalancing_source_cluster c2} state from other servers, the
+ * operation will be rejected with InvalidMetadataException since
+ * this server itself is not aware of C3
+ */
// CHANGE REBALANCING STATE
if(changeRebalanceState) {
try {
+ previousRebalancingSourceCluster = metadataStore.getRebalancingSourceCluster();
+ previousRebalancingSourceStores = metadataStore.getRebalancingSourceStores();
if(!rollback) {
+
+ // Save up the current cluster and stores def for
+ // Redirecting store
+ changeClusterAndStores(MetadataStore.REBALANCING_SOURCE_CLUSTER_XML,
+ currentCluster,
+ // Save the current store defs
+ // for Redirecting store
+ MetadataStore.REBALANCING_SOURCE_STORES_XML,
+ currentStoreDefs);
+
+ completedRebalanceSourceClusterChange = true;
+
for(RebalancePartitionsInfo info: rebalancePartitionsInfo) {
metadataStore.addRebalancingState(info);
completedRebalancePartitionsInfo.add(info);
}
} else {
+ // Reset the rebalancing source cluster back to null
+
+ changeClusterAndStores(MetadataStore.REBALANCING_SOURCE_CLUSTER_XML, null,
+ // Reset the rebalancing source stores back to null
+ MetadataStore.REBALANCING_SOURCE_STORES_XML,
+ null);
+
+ completedRebalanceSourceClusterChange = true;
+
for(RebalancePartitionsInfo info: rebalancePartitionsInfo) {
metadataStore.deleteRebalancingState(info);
completedRebalancePartitionsInfo.add(info);
@@ -184,17 +237,42 @@ public void rebalanceStateChange(Cluster cluster,
throw new VoldemortException(e);
}
}
+
+ // CHANGE CLUSTER METADATA AND STORE METADATA
+ if(changeClusterAndStoresMetadata) {
+ logger.info("Switching cluster metadata from " + currentCluster + " to " + cluster);
+ logger.info("Switching stores metadata from " + currentStoreDefs + " to "
+ + storeDefs);
+ changeClusterAndStores(MetadataStore.CLUSTER_KEY,
+ cluster,
+ MetadataStore.STORES_KEY,
+ storeDefs);
+
+ completedClusterAndStoresChange = true;
+
+ }
+
+ // SWAP RO DATA FOR ALL STORES
+ if(swapRO) {
+ swapROStores(swappedStoreNames, false);
+ }
+
} catch(VoldemortException e) {
logger.error("Got exception while changing state, now rolling back changes", e);
- // ROLLBACK CLUSTER CHANGE
- if(completedClusterChange) {
+ // ROLLBACK CLUSTER AND STORES CHANGE
+ if(completedClusterAndStoresChange) {
try {
- changeCluster(currentCluster);
+ logger.info("Rolling back cluster.xml to " + currentCluster);
+ logger.info("Rolling back stores.xml to " + currentStoreDefs);
+ changeClusterAndStores(MetadataStore.CLUSTER_KEY,
+ currentCluster,
+ MetadataStore.STORES_KEY,
+ currentStoreDefs);
} catch(Exception exception) {
- logger.error("Error while rolling back cluster metadata to " + currentCluster,
- exception);
+ logger.error("Error while rolling back cluster metadata to " + currentCluster
+ + " Stores metadata to " + currentStoreDefs, exception);
}
}
@@ -209,7 +287,6 @@ public void rebalanceStateChange(Cluster cluster,
// CHANGE BACK ALL REBALANCING STATES FOR COMPLETED ONES
if(completedRebalancePartitionsInfo.size() > 0) {
-
if(!rollback) {
for(RebalancePartitionsInfo info: completedRebalancePartitionsInfo) {
try {
@@ -234,6 +311,19 @@ public void rebalanceStateChange(Cluster cluster,
}
+ // Revert changes to REBALANCING_SOURCE_CLUSTER_XML and
+ // REBALANCING_SOURCE_STORES_XML
+ if(completedRebalanceSourceClusterChange) {
+ logger.info("Reverting the REBALANCING_SOURCE_CLUSTER_XML back to "
+ + previousRebalancingSourceCluster);
+ logger.info("Reverting the REBALANCING_SOURCE_STORES_XML back to "
+ + previousRebalancingSourceStores);
+ changeClusterAndStores(MetadataStore.REBALANCING_SOURCE_CLUSTER_XML,
+ previousRebalancingSourceCluster,
+ MetadataStore.REBALANCING_SOURCE_STORES_XML,
+ previousRebalancingSourceStores);
+ }
+
throw e;
}
@@ -283,29 +373,38 @@ private void swapROStores(List swappedStoreNames, boolean useSwappedStor
}
/**
- * Updates the cluster metadata
+ * Updates the cluster and store metadata atomically
+ *
+ * This is required during rebalance and expansion into a new zone since we
+ * have to update the store def along with the cluster def.
*
* @param cluster The cluster metadata information
+ * @param storeDefs The stores metadata information
*/
- private void changeCluster(final Cluster cluster) {
+ private void changeClusterAndStores(String clusterKey,
+ final Cluster cluster,
+ String storesKey,
+ final List storeDefs) {
+ metadataStore.writeLock.lock();
try {
- metadataStore.writeLock.lock();
- try {
- VectorClock updatedVectorClock = ((VectorClock) metadataStore.get(MetadataStore.CLUSTER_KEY,
- null)
- .get(0)
- .getVersion()).incremented(0,
- System.currentTimeMillis());
- logger.info("Switching metadata from " + metadataStore.getCluster() + " to "
- + cluster + " [ " + updatedVectorClock + " ]");
- metadataStore.put(MetadataStore.CLUSTER_KEY,
- Versioned.value((Object) cluster, updatedVectorClock));
- } finally {
- metadataStore.writeLock.unlock();
- }
+ VectorClock updatedVectorClock = ((VectorClock) metadataStore.get(clusterKey, null)
+ .get(0)
+ .getVersion()).incremented(metadataStore.getNodeId(),
+ System.currentTimeMillis());
+ metadataStore.put(clusterKey, Versioned.value((Object) cluster, updatedVectorClock));
+
+ // now put new stores
+ updatedVectorClock = ((VectorClock) metadataStore.get(storesKey, null)
+ .get(0)
+ .getVersion()).incremented(metadataStore.getNodeId(),
+ System.currentTimeMillis());
+ metadataStore.put(storesKey, Versioned.value((Object) storeDefs, updatedVectorClock));
+
} catch(Exception e) {
- logger.info("Error while changing cluster to " + cluster);
+ logger.info("Error while changing cluster to " + cluster + "for key " + clusterKey);
throw new VoldemortException(e);
+ } finally {
+ metadataStore.writeLock.unlock();
}
}
diff --git a/src/java/voldemort/server/rebalance/RebalancerState.java b/src/java/voldemort/server/rebalance/RebalancerState.java
index 6eb62d2ea7..5aa5588eb1 100644
--- a/src/java/voldemort/server/rebalance/RebalancerState.java
+++ b/src/java/voldemort/server/rebalance/RebalancerState.java
@@ -23,10 +23,10 @@
import java.util.Map;
import voldemort.client.rebalance.RebalancePartitionsInfo;
+import voldemort.routing.StoreRoutingPlan;
import voldemort.serialization.json.JsonReader;
import voldemort.serialization.json.JsonWriter;
import voldemort.store.metadata.MetadataStore;
-import voldemort.utils.StoreInstance;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
@@ -103,7 +103,7 @@ public RebalancePartitionsInfo find(String storeName,
// If yes, check if the key belongs to one of the partitions
// being moved
- if(StoreInstance.checkKeyBelongsToPartition(keyPartitions,
+ if(StoreRoutingPlan.checkKeyBelongsToPartition(keyPartitions,
nodePartitions,
info.getReplicaToAddPartitionList(storeName))) {
return info;
diff --git a/src/java/voldemort/server/rebalance/async/DonorBasedRebalanceAsyncOperation.java b/src/java/voldemort/server/rebalance/async/DonorBasedRebalanceAsyncOperation.java
index 3d94150ce7..c29be1396a 100644
--- a/src/java/voldemort/server/rebalance/async/DonorBasedRebalanceAsyncOperation.java
+++ b/src/java/voldemort/server/rebalance/async/DonorBasedRebalanceAsyncOperation.java
@@ -38,6 +38,7 @@
import voldemort.client.protocol.admin.AdminClient;
import voldemort.client.rebalance.RebalancePartitionsInfo;
import voldemort.cluster.Cluster;
+import voldemort.routing.StoreRoutingPlan;
import voldemort.server.StoreRepository;
import voldemort.server.VoldemortConfig;
import voldemort.server.rebalance.Rebalancer;
@@ -51,7 +52,6 @@
import voldemort.utils.ClosableIterator;
import voldemort.utils.Pair;
import voldemort.utils.RebalanceUtils;
-import voldemort.utils.StoreInstance;
import voldemort.versioning.Versioned;
import com.google.common.collect.HashMultimap;
@@ -330,7 +330,7 @@ private void fetchEntriesForStealers(StorageEngine st
while(running.get() && keys.hasNext()) {
ByteArray key = keys.next();
scanned++;
- List nodeIds = StoreInstance.checkKeyBelongsToPartition(key.get(),
+ List nodeIds = StoreRoutingPlan.checkKeyBelongsToPartition(key.get(),
optimizedStealerNodeToMappingTuples,
targetCluster,
storeDef);
@@ -378,7 +378,7 @@ private void fetchEntriesForStealersPartitionScan(StorageEngine value = entry.getSecond();
scanned++;
- List nodeIds = StoreInstance.checkKeyBelongsToPartition(key.get(),
+ List nodeIds = StoreRoutingPlan.checkKeyBelongsToPartition(key.get(),
optimizedStealerNodeToMappingTuples,
targetCluster,
storeDef);
diff --git a/src/java/voldemort/server/scheduler/slop/BlockingSlopPusherJob.java b/src/java/voldemort/server/scheduler/slop/BlockingSlopPusherJob.java
index 774fb94b9d..47b1cc5d9f 100644
--- a/src/java/voldemort/server/scheduler/slop/BlockingSlopPusherJob.java
+++ b/src/java/voldemort/server/scheduler/slop/BlockingSlopPusherJob.java
@@ -83,7 +83,7 @@ public BlockingSlopPusherJob(StoreRepository storeRepo,
public void run() {
// don't try to run slop pusher job when rebalancing
- if(metadataStore.getServerState()
+ if(metadataStore.getServerStateUnlocked()
.equals(MetadataStore.VoldemortState.REBALANCING_MASTER_SERVER)) {
logger.error("Cannot run slop pusher job since Voldemort server is rebalancing");
return;
diff --git a/src/java/voldemort/server/scheduler/slop/StreamingSlopPusherJob.java b/src/java/voldemort/server/scheduler/slop/StreamingSlopPusherJob.java
index b8cbd1804a..b1d636d6c4 100644
--- a/src/java/voldemort/server/scheduler/slop/StreamingSlopPusherJob.java
+++ b/src/java/voldemort/server/scheduler/slop/StreamingSlopPusherJob.java
@@ -115,7 +115,7 @@ public void run() {
loadMetadata();
// don't try to run slop pusher job when rebalancing
- if(metadataStore.getServerState()
+ if(metadataStore.getServerStateUnlocked()
.equals(MetadataStore.VoldemortState.REBALANCING_MASTER_SERVER)) {
logger.error("Cannot run slop pusher job since Voldemort server is rebalancing");
return;
@@ -196,7 +196,8 @@ public void run() {
if(logger.isTraceEnabled())
logger.trace("Pushing slop for " + versioned.getValue().getNodeId()
- + " and store " + versioned.getValue().getStoreName());
+ + " and store " + versioned.getValue().getStoreName()
+ + " of key: " + versioned.getValue().getKey());
if(failureDetector.isAvailable(node)) {
SynchronousQueue> slopQueue = slopQueues.get(nodeId);
diff --git a/src/java/voldemort/server/storage/RepairJob.java b/src/java/voldemort/server/storage/RepairJob.java
index 6544593729..00a0ea919a 100644
--- a/src/java/voldemort/server/storage/RepairJob.java
+++ b/src/java/voldemort/server/storage/RepairJob.java
@@ -64,7 +64,8 @@ public void startRepairJob() {
public void run() {
// don't try to run slop pusher job when rebalancing
- if(!metadataStore.getServerState().equals(MetadataStore.VoldemortState.NORMAL_SERVER)) {
+ if(!metadataStore.getServerStateUnlocked()
+ .equals(MetadataStore.VoldemortState.NORMAL_SERVER)) {
logger.error("Cannot run repair job since Voldemort server is not in normal state");
return;
}
@@ -97,8 +98,7 @@ public void run() {
long repairSlops = 0L;
long numDeletedKeys = 0;
while(iterator.hasNext()) {
- Pair> keyAndVal;
- keyAndVal = iterator.next();
+ Pair> keyAndVal = iterator.next();
List nodes = routingStrategy.routeRequest(keyAndVal.getFirst().get());
if(!hasDestination(nodes)) {
@@ -111,7 +111,8 @@ public void run() {
}
closeIterator(iterator);
localStats.put(storeDef.getName(), repairSlops);
- logger.info("Completed store " + storeDef.getName());
+ logger.info("Completed store " + storeDef.getName() + " #Scanned:"
+ + progress.get() + " #Deleted:" + numDeletedKeys);
}
}
} catch(Exception e) {
diff --git a/src/java/voldemort/server/storage/StorageService.java b/src/java/voldemort/server/storage/StorageService.java
index 0fe606b502..7fb0e076c9 100644
--- a/src/java/voldemort/server/storage/StorageService.java
+++ b/src/java/voldemort/server/storage/StorageService.java
@@ -77,6 +77,7 @@
import voldemort.store.nonblockingstore.NonblockingStore;
import voldemort.store.readonly.ReadOnlyStorageConfiguration;
import voldemort.store.readonly.ReadOnlyStorageEngine;
+import voldemort.store.rebalancing.ProxyPutStats;
import voldemort.store.rebalancing.RebootstrappingStore;
import voldemort.store.rebalancing.RedirectingStore;
import voldemort.store.retention.RetentionEnforcingStore;
@@ -96,6 +97,7 @@
import voldemort.utils.ByteArray;
import voldemort.utils.ClosableIterator;
import voldemort.utils.ConfigurationException;
+import voldemort.utils.DaemonThreadFactory;
import voldemort.utils.DynamicThrottleLimit;
import voldemort.utils.EventThrottler;
import voldemort.utils.JmxUtils;
@@ -137,6 +139,8 @@ public class StorageService extends AbstractService {
private final FailureDetector failureDetector;
private final StoreStats storeStats;
private final RoutedStoreFactory routedStoreFactory;
+ private final ExecutorService proxyPutWorkerPool;
+ private final ProxyPutStats aggregatedProxyPutStats;
public StorageService(StoreRepository storeRepository,
MetadataStore metadata,
@@ -179,6 +183,17 @@ public StorageService(StoreRepository storeRepository,
this.dynThrottleLimit = new DynamicThrottleLimit(rate);
} else
this.dynThrottleLimit = null;
+
+ // create the proxy put thread pool
+ this.proxyPutWorkerPool = Executors.newFixedThreadPool(config.getMaxProxyPutThreads(),
+ new DaemonThreadFactory("voldemort-proxy-put-thread"));
+ this.aggregatedProxyPutStats = new ProxyPutStats(null);
+ if(config.isJmxEnabled()) {
+ JmxUtils.registerMbean(this.aggregatedProxyPutStats,
+ JmxUtils.createObjectName("voldemort.store.rebalancing",
+ "aggregate-proxy-puts"));
+ }
+
}
private void initStorageConfig(String configClassName) {
@@ -752,11 +767,21 @@ public void registerEngine(StorageEngine engine,
}
if(voldemortConfig.isEnableRebalanceService()) {
+ ProxyPutStats proxyPutStats = new ProxyPutStats(aggregatedProxyPutStats);
+ if(voldemortConfig.isJmxEnabled()) {
+ JmxUtils.registerMbean(proxyPutStats,
+ JmxUtils.createObjectName("voldemort.store.rebalancing",
+ engine.getName()
+ + "-proxy-puts"));
+ }
store = new RedirectingStore(store,
metadata,
storeRepository,
failureDetector,
- storeFactory);
+ storeFactory,
+ voldemortConfig.getProxyPutsDuringRebalance(),
+ proxyPutWorkerPool,
+ proxyPutStats);
if(voldemortConfig.isJmxEnabled()) {
MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();
ObjectName name = null;
@@ -999,6 +1024,8 @@ protected void stopInner() {
logger.info("Closed client threadpool.");
+ storeFactory.close();
+
if(this.failureDetector != null) {
try {
this.failureDetector.destroy();
@@ -1006,9 +1033,18 @@ protected void stopInner() {
lastException = e;
}
}
-
logger.info("Closed failure detector.");
+ // shut down the proxy put thread pool
+ this.proxyPutWorkerPool.shutdown();
+ try {
+ if(!this.proxyPutWorkerPool.awaitTermination(10, TimeUnit.SECONDS))
+ this.proxyPutWorkerPool.shutdownNow();
+ } catch(InterruptedException e) {
+ this.proxyPutWorkerPool.shutdownNow();
+ }
+ logger.info("Closed proxy put thread pool.");
+
/* If there is an exception, throw it */
if(lastException instanceof VoldemortException)
throw (VoldemortException) lastException;
diff --git a/src/java/voldemort/store/AbstractStorageEngine.java b/src/java/voldemort/store/AbstractStorageEngine.java
index 16819c8507..4ba9aefcf9 100644
--- a/src/java/voldemort/store/AbstractStorageEngine.java
+++ b/src/java/voldemort/store/AbstractStorageEngine.java
@@ -1,7 +1,12 @@
package voldemort.store;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
import voldemort.utils.ClosableIterator;
import voldemort.utils.Pair;
+import voldemort.versioning.Occurred;
import voldemort.versioning.Versioned;
public class AbstractStorageEngine extends AbstractStore implements
@@ -49,9 +54,54 @@ public boolean beginBatchModifications() {
return false;
}
+ @Override
+ public List