Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,18 @@ public class ServerConstants {
*/
public static final Integer WIRE_VERSION = 3;

/**
* version (8) reflects changes to RFile index (ACCUMULO-1124) in version 1.8.0
*/
public static final int SHORTEN_RFILE_KEYS = 8;
/**
* version (7) also reflects the addition of a replication table
*/
public static final int MOVE_TO_REPLICATION_TABLE = 7;
/**
* this is the current data version
*/
public static final int DATA_VERSION = MOVE_TO_REPLICATION_TABLE;
public static final int DATA_VERSION = SHORTEN_RFILE_KEYS;
/**
* version (6) reflects the addition of a separate root table (ACCUMULO-1481) in version 1.6.0
*/
Expand All @@ -68,7 +72,7 @@ public class ServerConstants {
public static final int LOGGING_TO_HDFS = 4;
public static final BitSet CAN_UPGRADE = new BitSet();
static {
for (int i : new int[] {DATA_VERSION, MOVE_TO_ROOT_TABLE, MOVE_DELETE_MARKERS, LOGGING_TO_HDFS}) {
for (int i : new int[] {DATA_VERSION, MOVE_TO_REPLICATION_TABLE, MOVE_TO_ROOT_TABLE, MOVE_DELETE_MARKERS, LOGGING_TO_HDFS}) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added 1.7 to the set of release we can upgrade from. Probably need to remove some (like everyone that is not tested). I think we should test the upgrade from 1.6 to 1.8 and remove all other releases. Thoughts? I can take a wack at testing 1.6 to 1.8 upgrade testing, unless someone else wants to do it.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1.6 and 1.7 to 1.8 sound like they should both work to me.

My gut reaction is to not worry about 1.5, but is there any fundamental problem in supporting that (is it just a testing burden)? If we don't support a direct upgrade from 1.5, what's the fail-safe? Could users just bulk-import the old files into a new instance?

Copy link
Copy Markdown
Contributor

@keith-turner keith-turner Jul 13, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My gut reaction is to not worry about 1.5, but is there any fundamental problem in supporting that (is it just a testing burden)?

Yeah its just a matter of testing it. I am not completely sure, but it seems code may be in place to support that upgrade. I don't know if it works. Personally I would not want to support it w/o testing it.

If we don't support a direct upgrade from 1.5, what's the fail-safe?

Could add support for it in 1.8.1 or later if someone really wants that functionality.

Could users just bulk-import the old files into a new instance?

That's tricky, if done incorrectly can resurrect old and/or deleted data.

Also before making any definitive decisions about 1.5 and earlier, we should see if the 1.6 to 1.8 upgrade uncovers any interesting issues.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed on all of your points. Want to break out things to test in a JIRA or something? I can try to help.

CAN_UPGRADE.set(i);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.Map;
import java.util.Map.Entry;
import java.util.UUID;
import java.util.concurrent.TimeUnit;

import org.apache.accumulo.core.client.Instance;
import org.apache.accumulo.core.util.Pair;
Expand All @@ -34,9 +35,12 @@
import org.apache.accumulo.server.zookeeper.ZooReaderWriter;
import org.apache.hadoop.fs.Path;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.util.concurrent.Uninterruptibles;

/*
* This class governs the space in Zookeeper that advertises the status of Write-Ahead Logs
* in use by tablet servers and the replication machinery.
Expand Down Expand Up @@ -98,10 +102,16 @@ private String root() {
// Tablet server exists
public void initWalMarker(TServerInstance tsi) throws WalMarkerException {
byte[] data = new byte[0];
try {
zoo.putPersistentData(root() + "/" + tsi.toString(), data, NodeExistsPolicy.FAIL);
} catch (KeeperException | InterruptedException e) {
throw new WalMarkerException(e);
while (true) {
try {
zoo.putPersistentData(root() + "/" + tsi.toString(), data, NodeExistsPolicy.FAIL);
break;
} catch (NoNodeException e) {
log.info("WAL parent node does not exist (upgrade may be in progress) : " + e.getMessage());
Copy link
Copy Markdown
Contributor

@keith-turner keith-turner Jul 13, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ShawnWalker made a very nice suggestion in irc. Create the WALs node instead of waiting. This breaks this invisible dependency on code in the master.

Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS);
} catch (KeeperException | InterruptedException e) {
throw new WalMarkerException(e);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,10 @@ private void upgradeZookeeper() {

// add the currlog location for root tablet current logs
zoo.putPersistentData(ZooUtil.getRoot(getInstance()) + RootTable.ZROOT_TABLET_CURRENT_LOGS, new byte[0], NodeExistsPolicy.SKIP);

// create tablet server wal logs node in ZK
zoo.putPersistentData(ZooUtil.getRoot(getInstance()) + WalStateManager.ZWALS, new byte[0], NodeExistsPolicy.SKIP);

haveUpgradedZooKeeper = true;
} catch (Exception ex) {
// ACCUMULO-3651 Changed level to error and added FATAL to message for slf4j compatibility
Expand Down
25 changes: 13 additions & 12 deletions test/system/upgrade_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,22 @@ fi

#TODO could support multinode configs, this script assumes single node config

PREV=../../../../accumulo-1.5.0
PREV=../../../accumulo-1.7.1
CURR=../../
DIR=/accumulo
BULK=/tmp/upt
INSTANCE=testUp

pkill -f accumulo.start
hadoop fs -rmr "$DIR"
hadoop fs -rmr "$BULK"
hadoop fs -mkdir "$BULK/fail"
hadoop fs -mkdir -p "$BULK/fail"

"$PREV/bin/accumulo" init --clear-instance-name --instance-name testUp --password secret
"$PREV/bin/accumulo" init --clear-instance-name --instance-name $INSTANCE --password secret
"$PREV/bin/start-all.sh"

"$PREV/bin/accumulo" org.apache.accumulo.test.TestIngest -u root -p secret --timestamp 1 --size 50 --random 56 --rows 200000 --start 0 --cols 1 --createTable --splits 10
"$PREV/bin/accumulo" org.apache.accumulo.test.TestIngest --rfile $BULK/bulk/test --timestamp 1 --size 50 --random 56 --rows 200000 --start 200000 --cols 1
"$PREV/bin/accumulo" org.apache.accumulo.test.TestIngest -i $INSTANCE -u root -p secret --timestamp 1 --size 50 --random 56 --rows 200000 --start 0 --cols 1 --createTable --splits 10
"$PREV/bin/accumulo" org.apache.accumulo.test.TestIngest -i $INSTANCE -u root -p secret --rfile $BULK/bulk/test --timestamp 1 --size 50 --random 56 --rows 200000 --start 200000 --cols 1

echo -e "table test_ingest\nimportdirectory $BULK/bulk $BULK/fail false" | $PREV/bin/accumulo shell -u root -p secret
if [[ $1 == dirty ]]; then
Expand All @@ -54,23 +55,23 @@ fi
echo "==== Starting Current ==="

"$CURR/bin/start-all.sh"
"$CURR/bin/accumulo" org.apache.accumulo.test.VerifyIngest --size 50 --timestamp 1 --random 56 --rows 400000 --start 0 --cols 1 -u root -p secret
"$CURR/bin/accumulo" org.apache.accumulo.test.VerifyIngest --size 50 --timestamp 1 --random 56 --rows 400000 --start 0 --cols 1 -i $INSTANCE -u root -p secret
echo "compact -t test_ingest -w" | $CURR/bin/accumulo shell -u root -p secret
"$CURR/bin/accumulo" org.apache.accumulo.test.VerifyIngest --size 50 --timestamp 1 --random 56 --rows 400000 --start 0 --cols 1 -u root -p secret
"$CURR/bin/accumulo" org.apache.accumulo.test.VerifyIngest --size 50 --timestamp 1 --random 56 --rows 400000 --start 0 --cols 1 -i $INSTANCE -u root -p secret


"$CURR/bin/accumulo" org.apache.accumulo.test.TestIngest --timestamp 2 --size 50 --random 57 --rows 500000 --start 0 --cols 1 -u root -p secret
"$CURR/bin/accumulo" org.apache.accumulo.test.VerifyIngest --size 50 --timestamp 2 --random 57 --rows 500000 --start 0 --cols 1 -u root -p secret
"$CURR/bin/accumulo" org.apache.accumulo.test.TestIngest --timestamp 2 --size 50 --random 57 --rows 500000 --start 0 --cols 1 -i $INSTANCE -u root -p secret
"$CURR/bin/accumulo" org.apache.accumulo.test.VerifyIngest --size 50 --timestamp 2 --random 57 --rows 500000 --start 0 --cols 1 -i $INSTANCE -u root -p secret
echo "compact -t test_ingest -w" | $CURR/bin/accumulo shell -u root -p secret
"$CURR/bin/accumulo" org.apache.accumulo.test.VerifyIngest --size 50 --timestamp 2 --random 57 --rows 500000 --start 0 --cols 1 -u root -p secret
"$CURR/bin/accumulo" org.apache.accumulo.test.VerifyIngest --size 50 --timestamp 2 --random 57 --rows 500000 --start 0 --cols 1 -i $INSTANCE -u root -p secret

"$CURR/bin/stop-all.sh"
"$CURR/bin/start-all.sh"

"$CURR/bin/accumulo" org.apache.accumulo.test.VerifyIngest --size 50 --timestamp 2 --random 57 --rows 500000 --start 0 --cols 1 -u root -p secret
"$CURR/bin/accumulo" org.apache.accumulo.test.VerifyIngest --size 50 --timestamp 2 --random 57 --rows 500000 --start 0 --cols 1 -i $INSTANCE -u root -p secret

pkill -9 -f accumulo.start
"$CURR/bin/start-all.sh"

"$CURR/bin/accumulo" org.apache.accumulo.test.VerifyIngest --size 50 --timestamp 2 --random 57 --rows 500000 --start 0 --cols 1 -u root -p secret
"$CURR/bin/accumulo" org.apache.accumulo.test.VerifyIngest --size 50 --timestamp 2 --random 57 --rows 500000 --start 0 --cols 1 -i $INSTANCE -u root -p secret