updated configs and docs to reflect new config file

alexholmes · Jan 25, 2012 · bf85be9 · bf85be9
1 parent 9961208
commit bf85be9
Show file tree

Hide file tree

Showing 7 changed files with 138 additions and 189 deletions.
diff --git a/README.md b/README.md
diff --git a/TESTING.md b/TESTING.md
@@ -14,23 +14,18 @@ $ sudo dd bs=1048576 count=1 skip=0 if=/dev/sda of=/tmp/slurper-test/in/random-f
 $ md5sum /tmp/slurper-test/in/random-file
 969249981fa294b1273b91ec4dc3d34b  /tmp/slurper-test/in/random-file
 </code></pre>
-3.  Run the HDFS Slurper in standalone mode.
-<pre><code>export HADOOP_HOME=/usr/lib/hadoop
+3.  Edit `conf/slurper-env.sh` and set your JAVA_HOME and HADOOP_HOME settings.
+4.  Run the HDFS Slurper in standalone mode.
+<pre><code>
 bin/slurper.sh \
-  --datasource-name test \
-  --src-dir file:/tmp/slurper-test/in \
-  --dest-dir hdfs:/tmp/slurper-test/dest \
-  --dest-staging-dir hdfs:/tmp/slurper-test/staging \
-  --work-dir file:/tmp/slurper-test/work \
-  --complete-dir file:/tmp/slurper-test/complete \
-  --error-dir file:/tmp/slurper-test/error
+  --config-file /path/to/slurper/conf/examples/test.conf
 </code></pre>
-4.  Verify that the file was copied into HDFS
+5.  Verify that the file was copied into HDFS
 <pre><code>$ fs -ls /tmp/slurper-test/dest/random-file
 Found 1 items
 -rw-r--r--   1 user group    1048576 2012-01-17 21:09 /tmp/slurper-test/dest/random-file
 </code></pre>
-5.  Get the MD5 hash of the file in HDFS and verify it's the same as the original MD5 in step 2
+6.  Get the MD5 hash of the file in HDFS and verify it's the same as the original MD5 in step 2
 <pre><code>$ fs -cat /tmp/slurper-test/dest/random-file | md5sum
 969249981fa294b1273b91ec4dc3d34b  -
 </code></pre>
diff --git a/src/main/config/examples/basic.conf b/src/main/config/examples/basic.conf
@@ -0,0 +1,7 @@
+DATASOURCE_NAME = test
+SRC_DIR = file:/tmp/slurper/in
+WORK_DIR = file:/tmp/slurper/work
+COMPLETE_DIR = file:/tmp/slurper/complete
+ERROR_DIR = file:/tmp/slurper/error
+DEST_STAGING_DIR = hdfs:/incoming/stage
+DEST_DIR = hdfs:/incoming
diff --git a/src/main/config/examples/dynamic-dest.conf b/src/main/config/examples/dynamic-dest.conf
@@ -0,0 +1,7 @@
+DATASOURCE_NAME = test
+SRC_DIR = file:/tmp/slurper/in
+WORK_DIR = file:/tmp/slurper/work
+COMPLETE_DIR = file:/tmp/slurper/complete
+ERROR_DIR = file:/tmp/slurper/error
+DEST_STAGING_DIR = hdfs:/incoming/stage
+SCRIPT = /tmp/hdfs-file-slurper/src/main/python/sample-python.py
diff --git a/src/main/config/examples/lzop-verify.conf b/src/main/config/examples/lzop-verify.conf
@@ -0,0 +1,10 @@
+DATASOURCE_NAME = test
+SRC_DIR = file:/tmp/slurper/in
+WORK_DIR = file:/tmp/slurper/work
+COMPLETE_DIR = file:/tmp/slurper/complete
+ERROR_DIR = file:/tmp/slurper/error
+DEST_STAGING_DIR = hdfs:/incoming/stage
+DEST_DIR = hdfs:/incoming
+COMPRESSION_CODEC = com.hadoop.compression.lzo.LzopCodec
+CREATE_LZO_INDEX = true
+VERIFY = true
diff --git a/src/main/config/examples/test.conf b/src/main/config/examples/test.conf
@@ -0,0 +1,7 @@
+DATASOURCE_NAME = test
+SRC_DIR = file:/tmp/slurper-test/in
+WORK_DIR = file:/tmp/slurper-test/work
+COMPLETE_DIR = file:/tmp/slurper-test/complete
+ERROR_DIR = file:/tmp/slurper-test/error
+DEST_STAGING_DIR = hdfs:/tmp/slurper-test/staging
+DEST_DIR = hdfs:/tmp/slurper-test/dest
diff --git a/src/main/config/example.conf → src/main/config/slurper.conf b/src/main/config/example.conf → src/main/config/slurper.conf
@@ -1,14 +1,14 @@
 #############################################################################
 #
 #  A configuration file which can be used with the
-#  "__config_file" option.  
+#  "--config-file" option.
 #
 #############################################################################
 
 # A name used for the PID file, as well as the log filename, to support
 # multiple Slurpers instances working from the same installation directory.
 #
-DATASOURCE_NAME = alex
+DATASOURCE_NAME = test
 
 # The source directory.  This must be a fully_qualified URI.
 #
@@ -45,11 +45,12 @@ DEST_DIR = hdfs:/tmp/slurper/dest
 
 # The compression codec which should be used to compress the output.
 #
-COMPRESSION_CODEC = com.hadoop.compression.lzo.LzopCodec
+# COMPRESSION_CODEC = com.hadoop.compression.lzo.LzopCodec
+
 
 # If the destination file is LZOP, this option will create an index file.
 #
-CREATE_LZO_INDEX = true
+# CREATE_LZO_INDEX = true
 
 # Reads the destination file after the copy has completed and verifies
 # its integrity.