From 09f8d64bbb6d0374ed7858d41353bcc41235e0e5 Mon Sep 17 00:00:00 2001 From: lbschanno <52420215+lbschanno@users.noreply.github.com> Date: Tue, 9 Jul 2019 12:55:38 -0400 Subject: [PATCH] Refactor out DefaultFormatter (#35) (#56) * Refactor out usage of DefaultFormatter. It is an internal-only unstable API that is not recommended for users, and as such is unsuitable to include in examples. --- contrib/import-control.xml | 1 - .../examples/filedata/ChunkInputFormat.java | 17 ++-- .../examples/mapreduce/TableToFile.java | 4 +- .../accumulo/examples/util/FormatUtil.java | 97 +++++++++++++++++++ 4 files changed, 110 insertions(+), 9 deletions(-) create mode 100644 src/main/java/org/apache/accumulo/examples/util/FormatUtil.java diff --git a/contrib/import-control.xml b/contrib/import-control.xml index 0c0f647..f4628ff 100644 --- a/contrib/import-control.xml +++ b/contrib/import-control.xml @@ -36,7 +36,6 @@ - diff --git a/src/main/java/org/apache/accumulo/examples/filedata/ChunkInputFormat.java b/src/main/java/org/apache/accumulo/examples/filedata/ChunkInputFormat.java index 50c385b..49f592d 100644 --- a/src/main/java/org/apache/accumulo/examples/filedata/ChunkInputFormat.java +++ b/src/main/java/org/apache/accumulo/examples/filedata/ChunkInputFormat.java @@ -25,7 +25,7 @@ import org.apache.accumulo.core.client.mapreduce.InputFormatBase; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.util.format.DefaultFormatter; +import org.apache.accumulo.examples.util.FormatUtil; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; @@ -40,7 +40,7 @@ public class ChunkInputFormat extends InputFormatBase>,InputStream> { @Override public RecordReader>,InputStream> createRecordReader(InputSplit split, - TaskAttemptContext context) throws IOException, InterruptedException { + TaskAttemptContext context) { return new RecordReaderBase>,InputStream>() { private PeekingIterator> peekingScannerIterator; @@ -53,7 +53,9 @@ public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IO } @Override - public boolean nextKeyValue() throws IOException, InterruptedException { + public boolean nextKeyValue() throws IOException { + log.debug("nextKeyValue called"); + currentK.clear(); if (peekingScannerIterator.hasNext()) { ++numKeysRead; @@ -61,14 +63,17 @@ public boolean nextKeyValue() throws IOException, InterruptedException { while (!entry.getKey().getColumnFamily().equals(FileDataIngest.CHUNK_CF)) { currentK.add(entry); peekingScannerIterator.next(); - if (!peekingScannerIterator.hasNext()) + if (!peekingScannerIterator.hasNext()) { return true; + } entry = peekingScannerIterator.peek(); } currentKey = entry.getKey(); ((ChunkInputStream) currentV).setSource(peekingScannerIterator); - if (log.isTraceEnabled()) - log.trace("Processing key/value pair: " + DefaultFormatter.formatEntry(entry, true)); + if (log.isTraceEnabled()) { + log.trace("Processing key/value pair: " + FormatUtil.formatTableEntry(entry, true)); + } + return true; } return false; diff --git a/src/main/java/org/apache/accumulo/examples/mapreduce/TableToFile.java b/src/main/java/org/apache/accumulo/examples/mapreduce/TableToFile.java index 9621821..456546a 100644 --- a/src/main/java/org/apache/accumulo/examples/mapreduce/TableToFile.java +++ b/src/main/java/org/apache/accumulo/examples/mapreduce/TableToFile.java @@ -25,8 +25,8 @@ import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.util.format.DefaultFormatter; import org.apache.accumulo.examples.cli.ClientOpts; +import org.apache.accumulo.examples.util.FormatUtil; import org.apache.accumulo.hadoop.mapreduce.AccumuloInputFormat; import org.apache.accumulo.hadoop.mapreduce.InputFormatBuilder; import org.apache.hadoop.fs.Path; @@ -59,7 +59,7 @@ public static class TTFMapper extends Mapper { @Override public void map(Key row, Value data, Context context) throws IOException, InterruptedException { Map.Entry entry = new SimpleImmutableEntry<>(row, data); - context.write(NullWritable.get(), new Text(DefaultFormatter.formatEntry(entry, false))); + context.write(NullWritable.get(), new Text(FormatUtil.formatTableEntry(entry, false))); context.setStatus("Outputed Value"); } } diff --git a/src/main/java/org/apache/accumulo/examples/util/FormatUtil.java b/src/main/java/org/apache/accumulo/examples/util/FormatUtil.java new file mode 100644 index 0000000..72a64b5 --- /dev/null +++ b/src/main/java/org/apache/accumulo/examples/util/FormatUtil.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.accumulo.examples.util; + +import java.util.Map; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.hadoop.io.Text; + +public final class FormatUtil { + + /** + * Format and return the specified table entry as a human-readable String suitable for logging. + *
+ * If {@code includeTimestamp} is true, the entry will be formatted as:
+ * {@literal : \t}
+ * If false, the entry will be formatted as:
+ * {@literal : \t}
+ * Examples:
+ * {@literal a ~chunk:\x00\x00\x00d\x00\x00\x00\x00 [A&B] 9223372036854775807 asdfjkl;} + * {@literal a ~chunk:\x00\x00\x00d\x00\x00\x00\x00 [A&B] asdfjkl;} + * + * @param entry + * the table entry to format + * @param includeTimestamp + * if true, include the timestamp in the returned result + * @return the specified entry as a formatted String, or null if the entry is null + */ + public static String formatTableEntry(final Map.Entry entry, + final boolean includeTimestamp) { + if (entry == null) { + return null; + } + + Key key = entry.getKey(); + StringBuilder sb = new StringBuilder(); + Text buffer = new Text(); + + // Append row. + appendBytes(sb, key.getRow(buffer).getBytes()).append(" "); + + // Append column family. + appendBytes(sb, key.getColumnFamily().getBytes()).append(":"); + + // Append column qualifier. + appendBytes(sb, key.getColumnQualifier().getBytes()).append(" "); + + // Append visibility and timestamp. + sb.append(new ColumnVisibility(key.getColumnVisibility(buffer))); + + if (includeTimestamp) { + sb.append(" ").append(entry.getKey().getTimestamp()); + } + + // Append value. + Value value = entry.getValue(); + if (value != null && value.getSize() > 0) { + sb.append("\t"); + appendBytes(sb, value.get()); + } + return sb.toString(); + } + + private static StringBuilder appendBytes(final StringBuilder sb, final byte[] ba) { + for (byte b : ba) { + int c = 0xff & b; + if (c == '\\') { + sb.append("\\\\"); + } else if (c >= 32 && c <= 126) { + sb.append((char) c); + } else { + sb.append("\\x").append(String.format("%02X", c)); + } + } + return sb; + } + + private FormatUtil() { + throw new UnsupportedOperationException(); + } +}