Skip to content

Commit

Permalink
updated configs and docs to reflect new config file
Browse files Browse the repository at this point in the history
  • Loading branch information
alexholmes committed Jan 25, 2012
1 parent 9961208 commit bf85be9
Show file tree
Hide file tree
Showing 7 changed files with 138 additions and 189 deletions.
270 changes: 96 additions & 174 deletions README.md

Large diffs are not rendered by default.

17 changes: 6 additions & 11 deletions TESTING.md
Expand Up @@ -14,23 +14,18 @@ $ sudo dd bs=1048576 count=1 skip=0 if=/dev/sda of=/tmp/slurper-test/in/random-f
$ md5sum /tmp/slurper-test/in/random-file
969249981fa294b1273b91ec4dc3d34b /tmp/slurper-test/in/random-file
</code></pre>
3. Run the HDFS Slurper in standalone mode.
<pre><code>export HADOOP_HOME=/usr/lib/hadoop
3. Edit `conf/slurper-env.sh` and set your JAVA_HOME and HADOOP_HOME settings.
4. Run the HDFS Slurper in standalone mode.
<pre><code>
bin/slurper.sh \
--datasource-name test \
--src-dir file:/tmp/slurper-test/in \
--dest-dir hdfs:/tmp/slurper-test/dest \
--dest-staging-dir hdfs:/tmp/slurper-test/staging \
--work-dir file:/tmp/slurper-test/work \
--complete-dir file:/tmp/slurper-test/complete \
--error-dir file:/tmp/slurper-test/error
--config-file /path/to/slurper/conf/examples/test.conf
</code></pre>
4. Verify that the file was copied into HDFS
5. Verify that the file was copied into HDFS
<pre><code>$ fs -ls /tmp/slurper-test/dest/random-file
Found 1 items
-rw-r--r-- 1 user group 1048576 2012-01-17 21:09 /tmp/slurper-test/dest/random-file
</code></pre>
5. Get the MD5 hash of the file in HDFS and verify it's the same as the original MD5 in step 2
6. Get the MD5 hash of the file in HDFS and verify it's the same as the original MD5 in step 2
<pre><code>$ fs -cat /tmp/slurper-test/dest/random-file | md5sum
969249981fa294b1273b91ec4dc3d34b -
</code></pre>
7 changes: 7 additions & 0 deletions src/main/config/examples/basic.conf
@@ -0,0 +1,7 @@
DATASOURCE_NAME = test
SRC_DIR = file:/tmp/slurper/in
WORK_DIR = file:/tmp/slurper/work
COMPLETE_DIR = file:/tmp/slurper/complete
ERROR_DIR = file:/tmp/slurper/error
DEST_STAGING_DIR = hdfs:/incoming/stage
DEST_DIR = hdfs:/incoming
7 changes: 7 additions & 0 deletions src/main/config/examples/dynamic-dest.conf
@@ -0,0 +1,7 @@
DATASOURCE_NAME = test
SRC_DIR = file:/tmp/slurper/in
WORK_DIR = file:/tmp/slurper/work
COMPLETE_DIR = file:/tmp/slurper/complete
ERROR_DIR = file:/tmp/slurper/error
DEST_STAGING_DIR = hdfs:/incoming/stage
SCRIPT = /tmp/hdfs-file-slurper/src/main/python/sample-python.py
10 changes: 10 additions & 0 deletions src/main/config/examples/lzop-verify.conf
@@ -0,0 +1,10 @@
DATASOURCE_NAME = test
SRC_DIR = file:/tmp/slurper/in
WORK_DIR = file:/tmp/slurper/work
COMPLETE_DIR = file:/tmp/slurper/complete
ERROR_DIR = file:/tmp/slurper/error
DEST_STAGING_DIR = hdfs:/incoming/stage
DEST_DIR = hdfs:/incoming
COMPRESSION_CODEC = com.hadoop.compression.lzo.LzopCodec
CREATE_LZO_INDEX = true
VERIFY = true
7 changes: 7 additions & 0 deletions src/main/config/examples/test.conf
@@ -0,0 +1,7 @@
DATASOURCE_NAME = test
SRC_DIR = file:/tmp/slurper-test/in
WORK_DIR = file:/tmp/slurper-test/work
COMPLETE_DIR = file:/tmp/slurper-test/complete
ERROR_DIR = file:/tmp/slurper-test/error
DEST_STAGING_DIR = hdfs:/tmp/slurper-test/staging
DEST_DIR = hdfs:/tmp/slurper-test/dest
9 changes: 5 additions & 4 deletions src/main/config/example.conf → src/main/config/slurper.conf
@@ -1,14 +1,14 @@
#############################################################################
#
# A configuration file which can be used with the
# "__config_file" option.
# "--config-file" option.
#
#############################################################################

# A name used for the PID file, as well as the log filename, to support
# multiple Slurpers instances working from the same installation directory.
#
DATASOURCE_NAME = alex
DATASOURCE_NAME = test

# The source directory. This must be a fully_qualified URI.
#
Expand Down Expand Up @@ -45,11 +45,12 @@ DEST_DIR = hdfs:/tmp/slurper/dest

# The compression codec which should be used to compress the output.
#
COMPRESSION_CODEC = com.hadoop.compression.lzo.LzopCodec
# COMPRESSION_CODEC = com.hadoop.compression.lzo.LzopCodec


# If the destination file is LZOP, this option will create an index file.
#
CREATE_LZO_INDEX = true
# CREATE_LZO_INDEX = true

# Reads the destination file after the copy has completed and verifies
# its integrity.
Expand Down

0 comments on commit bf85be9

Please sign in to comment.