diff --git a/src/plugin/indexer-csv/README.md b/src/plugin/indexer-csv/README.md index 1eadea196e..6eb9711465 100644 --- a/src/plugin/indexer-csv/README.md +++ b/src/plugin/indexer-csv/README.md @@ -1,7 +1,7 @@ indexer-csv plugin for Nutch ============================ -**indexer-csv plugin** is used for writing documents to a CSV file. It does not work in distributed mode, the output is written to the local filesystem, not to HDFS, see [NUTCH-1541](https://issues.apache.org/jira/browse/NUTCH-1541). The configuration for the index writers is on **conf/index-writers.xml** file, included in the official Nutch distribution and it's as follow: +**indexer-csv plugin** is used for writing documents to a CSV file. The configuration for the index writers is on **conf/index-writers.xml** file, included in the official Nutch distribution and it's as follow: ```xml @@ -39,4 +39,4 @@ escapechar | Escape character used to escape a quote character | " maxfieldlength | Max. length of a single field value in characters | 4096 maxfieldvalues | Max. number of values of one field, useful for, e.g., the anchor texts field | 12 header | Write CSV column headers | true -outpath | Output path / directory (local filesystem path, relative to current working directory) | csvindexwriter \ No newline at end of file +outpath | Output path / directory (local filesystem path, relative to current working directory) | csvindexwriter diff --git a/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java b/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java index 160d03dc11..fcdd31140c 100644 --- a/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java +++ b/src/plugin/indexer-csv/src/java/org/apache/nutch/indexwriter/csv/CSVIndexWriter.java @@ -44,17 +44,14 @@ * index as CSV or tab-separated plain text table. Format (encoding, separators, * etc.) is configurable by a couple of options, see output of * {@link #describe()}. - * - *

- * Note: works only in local mode, to be used with index option - * -noCommit. - *

+ * */ public class CSVIndexWriter implements IndexWriter { public static final Logger LOG = LoggerFactory .getLogger(CSVIndexWriter.class); + private String filename = "nutch.csv"; private Configuration config; /** ordered list of fields (columns) in the CSV file */ @@ -192,7 +189,7 @@ protected int find(String value, int start) { @Override public void open(Configuration conf, String name) throws IOException { - + filename = name; } /** @@ -227,7 +224,7 @@ public void open(IndexWriterParams parameters) throws IOException { LOG.info("Writing output to {}", outputPath); Path outputDir = new Path(outputPath); fs = outputDir.getFileSystem(config); - csvLocalOutFile = new Path(outputDir, "nutch.csv"); + csvLocalOutFile = new Path(outputDir, filename); if (!fs.exists(outputDir)) { fs.mkdirs(outputDir); }