Platform-independent input newline handling

Handle CR and CRLF newlines in input. Output is still hard-coded to use LF for now. MOE_MIGRATED_REVID=136407062
google · Oct 17, 2016 · 4174d87 · sormuras · Oct 18, 2016 · cushon
1 parent 3caaae5
commit 4174d87
Show file tree

Hide file tree

Showing 10 changed files with 369 additions and 79 deletions.
diff --git a/core/src/main/java/com/google/googlejavaformat/Doc.java b/core/src/main/java/com/google/googlejavaformat/Doc.java
@@ -14,9 +14,12 @@
 
 package com.google.googlejavaformat;
 
+import static com.google.common.collect.Iterables.getLast;
+
 import com.google.common.base.MoreObjects;
 import com.google.common.base.Optional;
 import com.google.common.collect.DiscreteDomain;
+import com.google.common.collect.Iterators;
 import com.google.common.collect.Range;
 import com.google.googlejavaformat.Output.BreakTag;
 import java.util.ArrayList;
@@ -274,7 +277,7 @@ private static void splitByBreaks(List<Doc> docs, List<List<Doc>> splits, List<B
           breaks.add((Break) doc);
           splits.add(new ArrayList<Doc>());
         } else {
-          splits.get(splits.size() - 1).add(doc);
+          getLast(splits).add(doc);
         }
       }
     }
@@ -714,16 +717,16 @@ public void add(DocBuilder builder) {
 
     @Override
     float computeWidth() {
+      int idx = Newlines.firstBreak(tok.getOriginalText());
       // only count the first line of multi-line block comments
       if (tok.isComment()) {
-        int idx = tok.getOriginalText().indexOf('\n');
         if (idx > 0) {
           return idx;
         } else {
           return tok.length();
         }
       }
-      return tok.getOriginalText().contains("\n") ? Float.POSITIVE_INFINITY : (float) tok.length();
+      return idx != -1 ? Float.POSITIVE_INFINITY : (float) tok.length();
     }
 
     @Override
@@ -740,19 +743,8 @@ Range<Integer> computeRange() {
 
     @Override
     public State computeBreaks(CommentsHelper commentsHelper, int maxWidth, State state) {
-      int column = state.column;
-      int lines = 0;
-      text = commentsHelper.rewrite(tok, maxWidth, column);
-      // TODO(lowasser): use lastIndexOf('\n')
-      for (char c : text.toCharArray()) {
-        if (c == '\n') {
-          column = 0;
-          lines++;
-        } else {
-          column++;
-        }
-      }
-      return state.withColumn(column);
+      text = commentsHelper.rewrite(tok, maxWidth, state.column);
+      return state.withColumn(text.length() - Iterators.getLast(Newlines.lineOffsetIterator(text)));
     }
 
     @Override

diff --git a/core/src/main/java/com/google/googlejavaformat/InputOutput.java b/core/src/main/java/com/google/googlejavaformat/InputOutput.java
@@ -14,7 +14,6 @@
 
 package com.google.googlejavaformat;
 
-import com.google.common.base.CharMatcher;
 import com.google.common.collect.DiscreteDomain;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Range;
@@ -28,7 +27,6 @@ public abstract class InputOutput {
   private ImmutableList<String> lines = ImmutableList.of();
 
   protected static final Range<Integer> EMPTY_RANGE = Range.closedOpen(-1, -1);
-  private static final CharMatcher NEWLINE_MATCHER = CharMatcher.is('\n');
   private static final DiscreteDomain<Integer> INTEGERS = DiscreteDomain.integers();
 
   /** Set the lines. */
@@ -77,7 +75,7 @@ protected final void computeRanges(List<? extends Input.Tok> toks) {
     for (Input.Tok tok : toks) {
       String txt = tok.getOriginalText();
       int lineI0 = lineI;
-      lineI += NEWLINE_MATCHER.countIn(txt);
+      lineI += Newlines.count(txt);
       int k = tok.getIndex();
       if (k >= 0) {
         addToRanges(range0s, lineI0, k);

diff --git a/core/src/main/java/com/google/googlejavaformat/Newlines.java b/core/src/main/java/com/google/googlejavaformat/Newlines.java
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.googlejavaformat;
+
+import com.google.common.base.CharMatcher;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterators;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/** Platform-independent newline handling. */
+public class Newlines {
+
+  /** Returns the number of line breaks in the input. */
+  public static int count(String input) {
+    return Iterators.size(lineOffsetIterator(input)) - 1;
+  }
+
+  /** Returns the index of the first break in the input, or {@code -1}. */
+  public static int firstBreak(String input) {
+    Iterator<Integer> it = lineOffsetIterator(input);
+    it.next();
+    return it.hasNext() ? it.next() : -1;
+  }
+
+  private static final ImmutableSet<String> BREAKS = ImmutableSet.of("\r\n", "\n", "\r");
+
+  /** Returns true if the entire input string is a recognized line break. */
+  public static boolean isNewline(String input) {
+    return BREAKS.contains(input);
+  }
+
+  /**
+   * Returns the terminating line break in the input, or {@code null} if the input does not end in a
+   * break.
+   */
+  public static String getLineEnding(String input) {
+    for (String b : BREAKS) {
+      if (input.endsWith(b)) {
+        return b;
+      }
+    }
+    return null;
+  }
+
+  /** Returns true if the input contains any line breaks. */
+  public static boolean containsBreaks(String text) {
+    return CharMatcher.anyOf("\n\r").matchesAnyOf(text);
+  }
+
+  /** Returns an iterator over the start offsets of lines in the input. */
+  public static Iterator<Integer> lineOffsetIterator(String input) {
+    return new LineOffsetIterator(input);
+  }
+
+  /** Returns an iterator over lines in the input, including trailing whitespace. */
+  public static Iterator<String> lineIterator(String input) {
+    return new LineIterator(input);
+  }
+
+  private static class LineOffsetIterator implements Iterator<Integer> {
+
+    private int curr = 0;
+    private int idx = 0;
+    private final String input;
+
+    private LineOffsetIterator(String input) {
+      this.input = input;
+    }
+
+    @Override
+    public boolean hasNext() {
+      return curr != -1;
+    }
+
+    @Override
+    public Integer next() {
+      if (curr == -1) {
+        throw new NoSuchElementException();
+      }
+      int result = curr;
+      advance();
+      return result;
+    }
+
+    private void advance() {
+      for (; idx < input.length(); idx++) {
+        char c = input.charAt(idx);
+        switch (c) {
+          case '\r':
+            if (idx + 1 < input.length() && input.charAt(idx + 1) == '\n') {
+              idx++;
+            }
+            // falls through
+          case '\n':
+            idx++;
+            curr = idx;
+            return;
+          default:
+            break;
+        }
+      }
+      curr = -1;
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException("remove");
+    }
+  }
+
+  private static class LineIterator implements Iterator<String> {
+
+    int idx;
+    String curr;
+
+    private final String input;
+    private final Iterator<Integer> indices;
+
+    private LineIterator(String input) {
+      this.input = input;
+      this.indices = lineOffsetIterator(input);
+      idx = indices.next(); // read leading 0
+    }
+
+    private void advance() {
+      int last = idx;
+      if (indices.hasNext()) {
+        idx = indices.next();
+      } else if (hasNext()) {
+        // no terminal line break
+        idx = input.length();
+      } else {
+        throw new NoSuchElementException();
+      }
+      curr = input.substring(last, idx);
+    }
+
+    @Override
+    public boolean hasNext() {
+      return idx < input.length();
+    }
+
+    @Override
+    public String next() {
+      advance();
+      return curr;
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException("remove");
+    }
+  }
+}
diff --git a/core/src/main/java/com/google/googlejavaformat/java/Formatter.java b/core/src/main/java/com/google/googlejavaformat/java/Formatter.java
@@ -16,10 +16,10 @@
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 
-import com.google.common.base.CharMatcher;
 import com.google.common.base.Predicate;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
+import com.google.common.collect.Iterators;
 import com.google.common.collect.Range;
 import com.google.common.collect.RangeSet;
 import com.google.common.collect.TreeRangeSet;
@@ -29,6 +29,7 @@
 import com.google.googlejavaformat.Doc;
 import com.google.googlejavaformat.DocBuilder;
 import com.google.googlejavaformat.FormattingError;
+import com.google.googlejavaformat.Newlines;
 import com.google.googlejavaformat.Op;
 import com.google.googlejavaformat.OpsBuilder;
 import com.sun.tools.javac.file.JavacFileManager;
@@ -242,20 +243,13 @@ public ImmutableList<Replacement> getFormatReplacements(
     return javaOutput.getFormatReplacements(tokenRangeSet);
   }
 
-  static final CharMatcher NEWLINE = CharMatcher.is('\n');
-
   /**
    * Converts zero-indexed, [closed, open) line ranges in the given source file to character ranges.
    */
   public static RangeSet<Integer> lineRangesToCharRanges(
       String input, RangeSet<Integer> lineRanges) {
     List<Integer> lines = new ArrayList<>();
-    lines.add(0);
-    int idx = NEWLINE.indexIn(input);
-    while (idx >= 0) {
-      lines.add(idx + 1);
-      idx = NEWLINE.indexIn(input, idx + 1);
-    }
+    Iterators.addAll(lines, Newlines.lineOffsetIterator(input));
     lines.add(input.length() + 1);
 
     final RangeSet<Integer> characterRanges = TreeRangeSet.create();

diff --git a/core/src/main/java/com/google/googlejavaformat/java/ImportOrderer.java b/core/src/main/java/com/google/googlejavaformat/java/ImportOrderer.java
@@ -13,6 +13,7 @@
  */
 package com.google.googlejavaformat.java;
 
+import static com.google.common.collect.Iterables.getLast;
 import static org.eclipse.jdt.core.compiler.ITerminalSymbols.TokenNameclass;
 import static org.eclipse.jdt.core.compiler.ITerminalSymbols.TokenNameenum;
 import static org.eclipse.jdt.core.compiler.ITerminalSymbols.TokenNameinterface;
@@ -124,7 +125,7 @@ private String reorderImports() throws FormatterException {
     if (toks.isEmpty()) {
       tail = "";
     } else {
-      Tok lastTok = toks.get(toks.size() - 1);
+      Tok lastTok = getLast(toks);
       int tailStart = lastTok.getPosition() + lastTok.length();
       tail = text.substring(tailStart);
     }
@@ -336,8 +337,7 @@ private boolean isSpaceToken(int i) {
     if (s.isEmpty()) {
       return false;
     } else {
-      // TODO(b/26984991): if the formatter starts understanding \r\n then \r should be removed here
-      return " \t\f\r".indexOf(s.codePointAt(0)) >= 0;
+      return " \t\f".indexOf(s.codePointAt(0)) >= 0;
     }
   }
 

diff --git a/core/src/main/java/com/google/googlejavaformat/java/JavaCommentsHelper.java b/core/src/main/java/com/google/googlejavaformat/java/JavaCommentsHelper.java
@@ -15,10 +15,10 @@
 package com.google.googlejavaformat.java;
 
 import com.google.common.base.CharMatcher;
-import com.google.common.base.Splitter;
 import com.google.common.base.Strings;
 import com.google.googlejavaformat.CommentsHelper;
 import com.google.googlejavaformat.Input.Tok;
+import com.google.googlejavaformat.Newlines;
 import com.google.googlejavaformat.java.javadoc.JavadocFormatter;
 import java.util.ArrayList;
 import java.util.Iterator;
@@ -27,8 +27,6 @@
 /** {@code JavaCommentsHelper} extends {@link CommentsHelper} to rewrite Java comments. */
 public final class JavaCommentsHelper implements CommentsHelper {
 
-  private static final Splitter NEWLINE_SPLITTER = Splitter.on('\n');
-
   private final JavaFormatterOptions options;
 
   public JavaCommentsHelper(JavaFormatterOptions options) {
@@ -45,8 +43,9 @@ public String rewrite(Tok tok, int maxWidth, int column0) {
       text = JavadocFormatter.formatJavadoc(text, column0, options);
     }
     List<String> lines = new ArrayList<>();
-    for (String line : NEWLINE_SPLITTER.split(text)) {
-      lines.add(CharMatcher.whitespace().trimTrailingFrom(line));
+    Iterator<String> it = Newlines.lineIterator(text);
+    while (it.hasNext()) {
+      lines.add(CharMatcher.whitespace().trimTrailingFrom(it.next()));
     }
     if (tok.isSlashSlashComment()) {
       return indentLineComments(lines, column0);