Skip to content

Commit

Permalink
Merge remote-tracking branch 'voldemort/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
ctasada committed Mar 31, 2013
2 parents a235956 + 4964e7d commit 90c0b5f
Show file tree
Hide file tree
Showing 318 changed files with 28,599 additions and 7,304 deletions.
15 changes: 11 additions & 4 deletions .classpath
Expand Up @@ -9,17 +9,20 @@
<classpathentry kind="src" path="test/unit"/>
<classpathentry kind="src" path="test/integration"/>
<classpathentry kind="src" path="test/common"/>
<classpathentry kind="src" path="test/long"/>
<classpathentry kind="src" path="example/java"/>
<classpathentry kind="src" path="contrib/krati/src/java"/>
<classpathentry kind="src" path="contrib/krati/test"/>
<classpathentry kind="src" path="contrib/collections/src/java"/>
<classpathentry kind="src" path="contrib/collections/test"/>
<classpathentry kind="src" path="contrib/restclient/src/java"/>
<classpathentry kind="lib" path="lib/catalina-ant.jar"/>
<classpathentry kind="lib" path="lib/commons-codec-1.3.jar"/>
<classpathentry kind="lib" path="lib/commons-codec-1.4.jar"/>
<classpathentry kind="lib" path="lib/commons-dbcp-1.2.2.jar"/>
<classpathentry kind="lib" path="lib/colt-1.2.0.jar"/>
<classpathentry kind="lib" path="contrib/hadoop-store-builder/lib/commons-cli-2.0-SNAPSHOT.jar"/>
<classpathentry kind="lib" path="contrib/hadoop-store-builder/lib/hadoop-0.20.2-core.jar"/>
<classpathentry kind="lib" path="contrib/hadoop-store-builder/lib/commons-configuration-1.6.jar"/>
<classpathentry kind="lib" path="contrib/hadoop-store-builder/lib/hadoop-core-1.0.4-p2.jar"/>
<classpathentry kind="lib" path="lib/junit-4.6.jar"/>
<classpathentry kind="lib" path="lib/log4j-1.2.15.jar"/>
<classpathentry kind="lib" path="lib/jetty-6.1.18.jar"/>
Expand All @@ -40,7 +43,7 @@
<classpathentry kind="lib" path="lib/protobuf-java-2.3.0.jar"/>
<classpathentry kind="lib" path="contrib/ec2-testing/lib/typica.jar"/>
<classpathentry kind="lib" path="lib/google-collect-1.0.jar"/>
<classpathentry kind="lib" path="lib/je-4.0.92.jar"/>
<classpathentry kind="lib" path="lib/je-4.1.17.jar"/>
<classpathentry kind="lib" path="lib/paranamer-2.1.jar"/>
<classpathentry kind="lib" path="lib/jackson-mapper-asl-1.4.0.jar"/>
<classpathentry kind="lib" path="lib/jackson-core-asl-1.4.0.jar"/>
Expand All @@ -53,9 +56,13 @@
<classpathentry kind="lib" path="lib/snappy-0.2.jar"/>
<classpathentry kind="lib" path="lib/httpclient-4.1.2.jar"/>
<classpathentry kind="lib" path="lib/httpcore-4.1.2.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
<classpathentry kind="lib" path="lib/netty-3.5.8.Final.jar"/>
<classpathentry kind="lib" path="lib/joda-time-1.6.jar"/>
<classpathentry kind="lib" path="lib/mail-1.4.1.jar"/>
<classpathentry kind="lib" path="lib/azkaban-common-0.05.jar"/>
<classpathentry kind="lib" path="contrib/restclient/lib/data-1.5.10.jar"/>
<classpathentry kind="lib" path="contrib/restclient/lib/pegasus-common-1.5.10.jar"/>
<classpathentry kind="lib" path="contrib/restclient/lib/r2-1.5.10.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="output" path="classes"/>
</classpath>
2 changes: 1 addition & 1 deletion .settings/org.eclipse.jdt.ui.prefs
@@ -1,4 +1,4 @@
#Tue Jan 13 14:27:58 PST 2009
#Sat Sep 22 05:05:45 PDT 2012
cleanup.add_default_serial_version_id=true
cleanup.add_generated_serial_version_id=false
cleanup.add_missing_annotations=true
Expand Down
3 changes: 2 additions & 1 deletion META-INF/MANIFEST.MF
@@ -1,7 +1,8 @@
Manifest-Version: 1.0
Ant-Version: Apache Ant 1.7.1
Created-By: 20.2-b06 (Sun Microsystems Inc.)
Voldemort-Implementation-Version: 1.3.0
Implementation-Title: Voldemort
Implementation-Version: 0.96
Implementation-Version: 1.3.0
Implementation-Vendor: LinkedIn

110 changes: 110 additions & 0 deletions bin/PREUPGRADE_FOR_1_1_X_README
@@ -0,0 +1,110 @@
This directory contains utility to convert BDB JE data between different versions of Voldemort.

Need for Conversion
-------------------
Voldemort has been using "sorted duplicates" feature of BDB JE to handle
conflicting writes to the same key. At the very minimum, the conversion gets
rid of BDB sorted duplicates support and handles duplicates in the Voldemort
storage layer itself. The decision was made after months of closely working
with Oracle JE team, to understand the factors affecting performance.

Data Formats
------------
This section describes the data formats themselves.

1) Base Format (Base)
---------------------
This is the format used by Voldemort up until 1.1.x, relying on BDB JE for
duplicate handling

Disadvantages:
-- The manner in which BDB JE handles duplicates is not suitable for an
application with small percent of 2-3 duplicates i.e Voldemort.
-- Data bloat issue that prevented us from migrating to any higher 4.x version
to be able to control cache eviction
-- Incompatible with how duplicates are handled in JE5.
-- May incur additional locking costs for the "duplicates" subtree

2) New duplicate format (NewDup)
--------------------------------
This format is supported from release 1.1.x, where Voldemort storage layer
handles duplicates and BDB JE version is bumped up to JE 4.1.17

Advantages:
-- Ability to move data off disk. This is very GC friendly, relying on OS page
cache for the data and using the JVM heap only for index. This is achieved
by setting "bdb.cache.evictln" server parameter to "true"
-- Ability to evict data brought into the cache during scans, minimize impact
on online traffic (Restore, Rebalance, Retention). This is achieved by
setting "bdb.minimize.scan.impact" to "true"
-- Thinner storage layer. eg: BdbStorageEngine.put() does not incur the cost
of an additional delete()
-- General speed up due to elimination of duplicates

This format is the minimum requirement to be able to upgrade to 1.1.x & higher

3) Partition Scan format (PidScan)
----------------------------------
This is a super set of 'NewDup' format, supported 1.1.x upwards. In addition to
eliminating duplicates and upgrading to JE 4.1.17, it adds a 2 byte prefix
representing the partition id to each key.

Advantages:
-- Speed up Restore and Rebalancing linearly to the number of partitions
actually fetched. (which means much shorter degraded mode performance)

This is an optional format. You can turn it off, by setting
bdb.prefix.keys.with.partitionid=false, if you don't like for some reason

Note : We have not seen the extra 2 bytes cause any overhead to online
performance

IMPORTANT: IT IS REQUIRED TO CONVERT TO EITHER 'NewDup' or 'PidScan' TO RUN
VOLDEMORT WITH BDB, STARTING RELEASE 1.1.x

Running the Conversion Utility
------------------------------
The tool provides the ability to convert one database from a source environment
to a destination environment. You need to run the tool for each of the databases
or voldemort store you have. You can bring one Voldemort server at a time and
perform the conversion and bring it up on the appropriate release

Note: For users running with "bdb.one.env.per.store=false", it means you will
have to run the tool with the same --src --dest options for each database
contained.

In addition to BDB environment locations, the tool needs the cluster.xml to generate
the partition prefix.

$./voldemort-convert-bdb.sh --src <Required: Path to source bdb environment>
--dest <Required: Path to place converted new environment>
--store <Required: BDB database (voldemort store) name>
--cluster-xml <Required: Path to cluster.xml>
--from-format <Required: Format to convert FROM, one of the 3
strings 'Base','NewDup','PidScan'>
--to-format <Required: Format to convert TO, one of the 3
strings 'Base','NewDup','PidScan'>
--je-log-size <Optional: Size in MB for the new JE log files,
Default:60>
--btree-nodemax <Optional: Btree fan out, Default: 512>

We recommend you run the following to move to release 1.1.x & up.

$./voldemort-convert-bdb.sh --src /path/to/src/env
--dest /path/to/dest/env
--store teststore
--cluster-xml /path/to/cluster/xml
--from-format Base
--to-format PidScan












13 changes: 11 additions & 2 deletions bin/generate_cluster_xml.py
Expand Up @@ -10,6 +10,8 @@
# Setup and argument parser
parser = argparse.ArgumentParser(description='Build a voldemort cluster.xml.')
# Add supported arguments
parser.add_argument('-f', '--file', type=str, dest='file',
help='the file of the list of hosts(one per line)')
parser.add_argument('-N', '--name', type=str, default='voldemort', dest='name',
help='the name you want to give the cluster')
parser.add_argument('-n', '--nodes', type=int, default=2, dest='nodes',
Expand Down Expand Up @@ -44,7 +46,11 @@
sys.exit(1)

# Store arguments
nodes = args.nodes
if args.file:
hostList = open(args.file).readlines()
nodes = len(hostList)
else:
nodes = args.nodes
partitions = args.partitions
name = args.name
http_port = args.http_port
Expand Down Expand Up @@ -73,7 +79,10 @@

print " <server>"
print " <id>%d</id>" % i
print " <host>host%d</host>" % i
if args.file:
print " <host>%s</host>" % hostList[i].strip()
else:
print " <host>host%d</host>" % i
print " <http-port>%d</http-port>" % http_port
print " <socket-port>%d</socket-port>" % sock_port
print " <admin-port>%d</admin-port>" % admin_port
Expand Down
46 changes: 46 additions & 0 deletions bin/run-class.bat
@@ -0,0 +1,46 @@
@echo off

REM
REM Copyright 2013 Carlos Tasada
REM
REM Licensed under the Apache License, Version 2.0 (the "License");
REM you may not use this file except in compliance with the License.
REM You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.
REM
REM ** This Windows BAT file is not tested with each Voldemort release. **

set argC=0
for %%a in (%*) do set /a argC+=1
if %argC% geq 1 goto :continue
echo %0 java-class-name [options]
goto :eof
:continue

SET BASE_DIR=%~dp0..
SET CLASSPATH=.

set VOLDEMORT_CONFIG_DIR=%1%/config

for %%j in ("%BASE_DIR%\dist\*.jar") do (call :append_classpath "%%j")
for %%j in ("%BASE_DIR%\lib\*.jar") do (call :append_classpath "%%j")
set CLASSPATH=%CLASSPATH%;"%BASE_DIR%\dist\resources"
goto :run

:append_classpath
set CLASSPATH=%CLASSPATH%;%1
goto :eof

:run
if "%VOLD_OPTS%" == "" set "VOLD_OPTS=-Xmx2G -server -Dcom.sun.management.jmxremote"
java -Dlog4j.configuration=%VOLDEMORT_CONFIG_DIR%\log4j.properties %VOLD_OPTS% -cp %CLASSPATH% %*

endlocal
:eof
12 changes: 9 additions & 3 deletions bin/run-class.sh
@@ -1,7 +1,7 @@
#!/bin/bash

#
# Copyright 2008-2009 LinkedIn, Inc
# Copyright 2008-2013 LinkedIn, Inc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,10 @@ if [ $# -lt 1 ]; then
exit 1
fi

base_dir=$(dirname $0)/..
script_path=$(readlink -f "$0")
script_dir=`dirname "$script_path"`

base_dir=`dirname "$script_dir"`

for file in $base_dir/lib/*.jar;
do
Expand All @@ -43,5 +46,8 @@ if [ -z "$VOLD_OPTS" ]; then
VOLD_OPTS="-Xmx2G -server -Dcom.sun.management.jmxremote "
fi

# add '-Dlog4j.debug ' to debug log4j issues.
LOG4JPROPERTIES="-Dlog4j.configuration=file:///${base_dir}/src/java/log4j.properties"

export CLASSPATH
java -Dlog4j.configuration=$base_dir/src/java/log4j.properties $VOLD_OPTS -cp $CLASSPATH $@
java $LOG4JPROPERTIES $VOLD_OPTS -cp $CLASSPATH $@
22 changes: 22 additions & 0 deletions bin/voldemort-admin-tool.bat
@@ -0,0 +1,22 @@
@echo off

REM
REM Copyright 2013 Carlos Tasada
REM
REM Licensed under the Apache License, Version 2.0 (the "License");
REM you may not use this file except in compliance with the License.
REM You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.
REM
REM ** This Windows BAT file is not tested with each Voldemort release. **

SET BASE_DIR=%~dp0..

call "%BASE_DIR%/bin/run-class.bat" voldemort.VoldemortAdminTool %*
39 changes: 39 additions & 0 deletions bin/voldemort-convert-bdb.sh
@@ -0,0 +1,39 @@
#!/bin/bash
#
# Copyright 2008-2012 LinkedIn, Inc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

base_dir=$(dirname $0)/..

for file in $base_dir/dist/*.jar;
do
CLASSPATH=$CLASSPATH:$file
done

for file in $base_dir/lib/*.jar;
do
CLASSPATH=$CLASSPATH:$file
done

for file in $base_dir/contrib/*/lib/*.jar;
do
CLASSPATH=$CLASSPATH:$file
done

CLASSPATH=$CLASSPATH:$base_dir/dist/resources

JVM_OPTS="-server -Xms5g -Xmx5g -XX:NewSize=1024m -XX:MaxNewSize=1024m -XX:+AlwaysPreTouch -XX:+UseCompressedOops -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:CMSInitiatingOccupancyFraction=70 -XX:SurvivorRatio=2"

java -Dlog4j.configuration=src/java/log4j.properties $JVM_OPTS -cp $CLASSPATH voldemort.store.bdb.dataconversion.BdbConvertData $@
22 changes: 22 additions & 0 deletions bin/voldemort-performance-tool.bat
@@ -0,0 +1,22 @@
@echo off

REM
REM Copyright 2013 Carlos Tasada
REM
REM Licensed under the Apache License, Version 2.0 (the "License");
REM you may not use this file except in compliance with the License.
REM You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.
REM
REM ** This Windows BAT file is not tested with each Voldemort release. **

SET BASE_DIR=%~dp0..

call "%BASE_DIR%/bin/run-class.bat" voldemort.performance.benchmark.Benchmark %*
22 changes: 22 additions & 0 deletions bin/voldemort-rebalance.bat
@@ -0,0 +1,22 @@
@echo off

REM
REM Copyright 2013 Carlos Tasada
REM
REM Licensed under the Apache License, Version 2.0 (the "License");
REM you may not use this file except in compliance with the License.
REM You may obtain a copy of the License at
REM
REM http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing, software
REM distributed under the License is distributed on an "AS IS" BASIS,
REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
REM See the License for the specific language governing permissions and
REM limitations under the License.
REM
REM ** This Windows BAT file is not tested with each Voldemort release. **

SET BASE_DIR=%~dp0..

call "%BASE_DIR%/bin/run-class.bat" voldemort.client.rebalance.RebalanceCLI %*

0 comments on commit 90c0b5f

Please sign in to comment.