From 53992542040bde33a8f3f93560adaac007b353c6 Mon Sep 17 00:00:00 2001 From: "yuming.wang" Date: Wed, 9 Sep 2015 18:05:45 +0800 Subject: [PATCH] support more delimiter when using CsvBulkImport Add a function to support more delimiter when using CsvBulkImport, such as /001, /t. --- .../phoenix/mapreduce/CsvBulkLoadTool.java | 92 ++++++++++++++++--- 1 file changed, 80 insertions(+), 12 deletions(-) diff --git a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkLoadTool.java b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkLoadTool.java index bb4054bc605..8dcef73eeff 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkLoadTool.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/CsvBulkLoadTool.java @@ -341,28 +341,19 @@ private static void configureOptions(CommandLine cmdLine, List impor char delimiterChar = ','; if (cmdLine.hasOption(DELIMITER_OPT.getOpt())) { String delimString = cmdLine.getOptionValue(DELIMITER_OPT.getOpt()); - if (delimString.length() != 1) { - throw new IllegalArgumentException("Illegal delimiter character: " + delimString); - } - delimiterChar = delimString.charAt(0); + delimiterChar = toChar(delimString); } char quoteChar = '"'; if (cmdLine.hasOption(QUOTE_OPT.getOpt())) { String quoteString = cmdLine.getOptionValue(QUOTE_OPT.getOpt()); - if (quoteString.length() != 1) { - throw new IllegalArgumentException("Illegal quote character: " + quoteString); - } - quoteChar = quoteString.charAt(0); + quoteChar = toChar(quoteString); } char escapeChar = '\\'; if (cmdLine.hasOption(ESCAPE_OPT.getOpt())) { String escapeString = cmdLine.getOptionValue(ESCAPE_OPT.getOpt()); - if (escapeString.length() != 1) { - throw new IllegalArgumentException("Illegal escape character: " + escapeString); - } - escapeChar = escapeString.charAt(0); + escapeChar = toChar(escapeString); } CsvBulkImportUtil.initCsvImportJob( @@ -378,6 +369,83 @@ private static void configureOptions(CommandLine cmdLine, List impor cmdLine.hasOption(IGNORE_ERRORS_OPT.getOpt())); } + /** + * Given a string containing a single character or an escape sequence + * representing a char, return that char itself. + * + * Normal literal characters return themselves: "x" -> 'x', etc. + * Strings containing a '\' followed by one of t, r, n, or b escape to the + * usual character as seen in Java: "\n" -> (newline), etc. + * + * Strings like "\0ooo" return the character specified by the octal sequence + * 'ooo'. Strings like "\0xhhh" or "\0Xhhh" return the character specified by + * the hex sequence 'hhh'. + * + * If the input string contains leading or trailing spaces, these are + * ignored. + */ + public static char toChar(String charish) throws IllegalArgumentException { + if (null == charish || charish.length() == 0) { + throw new IllegalArgumentException("Illegal escape character: " + charish); + } + + if (charish.startsWith("\\0x") || charish.startsWith("\\0X")) { + if (charish.length() == 3) { + throw new IllegalArgumentException("Illegal escape character: " + charish); + } else { + String valStr = charish.substring(3); + int val = Integer.parseInt(valStr, 16); + return (char) val; + } + } else if (charish.startsWith("\\0")) { + if (charish.equals("\\0")) { + // it's just '\0', which we can take as shorthand for nul. + return '\000'; + } else { + // it's an octal value. + String valStr = charish.substring(2); + int val = Integer.parseInt(valStr, 8); + return (char) val; + } + } else if (charish.startsWith("\\")) { + if (charish.length() == 1) { + // it's just a '\'. Keep it literal. + return '\\'; + } else if (charish.length() > 2) { + // we don't have any 3+ char escape strings. + throw new IllegalArgumentException("Illegal escape character: " + charish); + } else { + // this is some sort of normal 1-character escape sequence. + char escapeWhat = charish.charAt(1); + switch(escapeWhat) { + case 'b': + return '\b'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case '\"': + return '\"'; + case '\'': + return '\''; + case '\\': + return '\\'; + default: + throw new IllegalArgumentException("Illegal escape character: " + charish); + } + } + } else { + // it's a normal character. + if (charish.length() > 1) { + LOG.warn("Character argument " + charish + " has multiple characters; " + + "only the first will be used."); + } + return charish.charAt(0); + } + } + /** * Perform any required validation on the table being bulk loaded into: * - ensure no column family names start with '_', as they'd be ignored leading to problems.