Skip to content

Commit

Permalink
Platform-independent input newline handling
Browse files Browse the repository at this point in the history
Handle CR and CRLF newlines in input.  Output is still hard-coded to use
LF for now.

MOE_MIGRATED_REVID=136407062
  • Loading branch information
cushon committed Oct 17, 2016
1 parent 3caaae5 commit 4174d87
Show file tree
Hide file tree
Showing 10 changed files with 369 additions and 79 deletions.
24 changes: 8 additions & 16 deletions core/src/main/java/com/google/googlejavaformat/Doc.java
Expand Up @@ -14,9 +14,12 @@

package com.google.googlejavaformat;

import static com.google.common.collect.Iterables.getLast;

import com.google.common.base.MoreObjects;
import com.google.common.base.Optional;
import com.google.common.collect.DiscreteDomain;
import com.google.common.collect.Iterators;
import com.google.common.collect.Range;
import com.google.googlejavaformat.Output.BreakTag;
import java.util.ArrayList;
Expand Down Expand Up @@ -274,7 +277,7 @@ private static void splitByBreaks(List<Doc> docs, List<List<Doc>> splits, List<B
breaks.add((Break) doc);
splits.add(new ArrayList<Doc>());
} else {
splits.get(splits.size() - 1).add(doc);
getLast(splits).add(doc);
}
}
}
Expand Down Expand Up @@ -714,16 +717,16 @@ public void add(DocBuilder builder) {

@Override
float computeWidth() {
int idx = Newlines.firstBreak(tok.getOriginalText());
// only count the first line of multi-line block comments
if (tok.isComment()) {
int idx = tok.getOriginalText().indexOf('\n');
if (idx > 0) {
return idx;
} else {
return tok.length();
}
}
return tok.getOriginalText().contains("\n") ? Float.POSITIVE_INFINITY : (float) tok.length();
return idx != -1 ? Float.POSITIVE_INFINITY : (float) tok.length();
}

@Override
Expand All @@ -740,19 +743,8 @@ Range<Integer> computeRange() {

@Override
public State computeBreaks(CommentsHelper commentsHelper, int maxWidth, State state) {
int column = state.column;
int lines = 0;
text = commentsHelper.rewrite(tok, maxWidth, column);
// TODO(lowasser): use lastIndexOf('\n')
for (char c : text.toCharArray()) {
if (c == '\n') {
column = 0;
lines++;
} else {
column++;
}
}
return state.withColumn(column);
text = commentsHelper.rewrite(tok, maxWidth, state.column);
return state.withColumn(text.length() - Iterators.getLast(Newlines.lineOffsetIterator(text)));
}

@Override
Expand Down
Expand Up @@ -14,7 +14,6 @@

package com.google.googlejavaformat;

import com.google.common.base.CharMatcher;
import com.google.common.collect.DiscreteDomain;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Range;
Expand All @@ -28,7 +27,6 @@ public abstract class InputOutput {
private ImmutableList<String> lines = ImmutableList.of();

protected static final Range<Integer> EMPTY_RANGE = Range.closedOpen(-1, -1);
private static final CharMatcher NEWLINE_MATCHER = CharMatcher.is('\n');
private static final DiscreteDomain<Integer> INTEGERS = DiscreteDomain.integers();

/** Set the lines. */
Expand Down Expand Up @@ -77,7 +75,7 @@ protected final void computeRanges(List<? extends Input.Tok> toks) {
for (Input.Tok tok : toks) {
String txt = tok.getOriginalText();
int lineI0 = lineI;
lineI += NEWLINE_MATCHER.countIn(txt);
lineI += Newlines.count(txt);
int k = tok.getIndex();
if (k >= 0) {
addToRanges(range0s, lineI0, k);
Expand Down
167 changes: 167 additions & 0 deletions core/src/main/java/com/google/googlejavaformat/Newlines.java
@@ -0,0 +1,167 @@
/*
* Copyright 2016 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/

package com.google.googlejavaformat;

import com.google.common.base.CharMatcher;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import java.util.Iterator;
import java.util.NoSuchElementException;

/** Platform-independent newline handling. */
public class Newlines {

/** Returns the number of line breaks in the input. */
public static int count(String input) {
return Iterators.size(lineOffsetIterator(input)) - 1;
}

/** Returns the index of the first break in the input, or {@code -1}. */
public static int firstBreak(String input) {
Iterator<Integer> it = lineOffsetIterator(input);
it.next();
return it.hasNext() ? it.next() : -1;
}

private static final ImmutableSet<String> BREAKS = ImmutableSet.of("\r\n", "\n", "\r");

/** Returns true if the entire input string is a recognized line break. */
public static boolean isNewline(String input) {
return BREAKS.contains(input);
}

/**
* Returns the terminating line break in the input, or {@code null} if the input does not end in a
* break.
*/
public static String getLineEnding(String input) {
for (String b : BREAKS) {
if (input.endsWith(b)) {

This comment has been minimized.

Copy link
@sormuras

sormuras Oct 18, 2016

Contributor

This method fails to recognize the line ending of a text, that does not end with a line separator. Like core\src\test\resources\com\google\googlejavaformat\java\testdata\B18479811.input

This comment has been minimized.

Copy link
@cushon

cushon Oct 18, 2016

Author Collaborator

What would you expect it to do instead? In the context where it's used in this commit we need to distinguish between lines that don't end in breaks and ones that do.

This comment has been minimized.

Copy link
@sormuras

sormuras Oct 18, 2016

Contributor

Ah! input is a single line. Then it makes sense.
I'm implementing the output part and need a multi-line parser, that returns the line ending used. Will use input.contains(b) there.

return b;
}
}
return null;
}

/** Returns true if the input contains any line breaks. */
public static boolean containsBreaks(String text) {
return CharMatcher.anyOf("\n\r").matchesAnyOf(text);
}

/** Returns an iterator over the start offsets of lines in the input. */
public static Iterator<Integer> lineOffsetIterator(String input) {
return new LineOffsetIterator(input);
}

/** Returns an iterator over lines in the input, including trailing whitespace. */
public static Iterator<String> lineIterator(String input) {
return new LineIterator(input);
}

private static class LineOffsetIterator implements Iterator<Integer> {

private int curr = 0;
private int idx = 0;
private final String input;

private LineOffsetIterator(String input) {
this.input = input;
}

@Override
public boolean hasNext() {
return curr != -1;
}

@Override
public Integer next() {
if (curr == -1) {
throw new NoSuchElementException();
}
int result = curr;
advance();
return result;
}

private void advance() {
for (; idx < input.length(); idx++) {
char c = input.charAt(idx);
switch (c) {
case '\r':
if (idx + 1 < input.length() && input.charAt(idx + 1) == '\n') {
idx++;
}
// falls through
case '\n':
idx++;
curr = idx;
return;
default:
break;
}
}
curr = -1;
}

@Override
public void remove() {
throw new UnsupportedOperationException("remove");
}
}

private static class LineIterator implements Iterator<String> {

int idx;
String curr;

private final String input;
private final Iterator<Integer> indices;

private LineIterator(String input) {
this.input = input;
this.indices = lineOffsetIterator(input);
idx = indices.next(); // read leading 0
}

private void advance() {
int last = idx;
if (indices.hasNext()) {
idx = indices.next();
} else if (hasNext()) {
// no terminal line break
idx = input.length();
} else {
throw new NoSuchElementException();
}
curr = input.substring(last, idx);
}

@Override
public boolean hasNext() {
return idx < input.length();
}

@Override
public String next() {
advance();
return curr;
}

@Override
public void remove() {
throw new UnsupportedOperationException("remove");
}
}
}
Expand Up @@ -16,10 +16,10 @@

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.base.CharMatcher;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Range;
import com.google.common.collect.RangeSet;
import com.google.common.collect.TreeRangeSet;
Expand All @@ -29,6 +29,7 @@
import com.google.googlejavaformat.Doc;
import com.google.googlejavaformat.DocBuilder;
import com.google.googlejavaformat.FormattingError;
import com.google.googlejavaformat.Newlines;
import com.google.googlejavaformat.Op;
import com.google.googlejavaformat.OpsBuilder;
import com.sun.tools.javac.file.JavacFileManager;
Expand Down Expand Up @@ -242,20 +243,13 @@ public ImmutableList<Replacement> getFormatReplacements(
return javaOutput.getFormatReplacements(tokenRangeSet);
}

static final CharMatcher NEWLINE = CharMatcher.is('\n');

/**
* Converts zero-indexed, [closed, open) line ranges in the given source file to character ranges.
*/
public static RangeSet<Integer> lineRangesToCharRanges(
String input, RangeSet<Integer> lineRanges) {
List<Integer> lines = new ArrayList<>();
lines.add(0);
int idx = NEWLINE.indexIn(input);
while (idx >= 0) {
lines.add(idx + 1);
idx = NEWLINE.indexIn(input, idx + 1);
}
Iterators.addAll(lines, Newlines.lineOffsetIterator(input));
lines.add(input.length() + 1);

final RangeSet<Integer> characterRanges = TreeRangeSet.create();
Expand Down
Expand Up @@ -13,6 +13,7 @@
*/
package com.google.googlejavaformat.java;

import static com.google.common.collect.Iterables.getLast;
import static org.eclipse.jdt.core.compiler.ITerminalSymbols.TokenNameclass;
import static org.eclipse.jdt.core.compiler.ITerminalSymbols.TokenNameenum;
import static org.eclipse.jdt.core.compiler.ITerminalSymbols.TokenNameinterface;
Expand Down Expand Up @@ -124,7 +125,7 @@ private String reorderImports() throws FormatterException {
if (toks.isEmpty()) {
tail = "";
} else {
Tok lastTok = toks.get(toks.size() - 1);
Tok lastTok = getLast(toks);
int tailStart = lastTok.getPosition() + lastTok.length();
tail = text.substring(tailStart);
}
Expand Down Expand Up @@ -336,8 +337,7 @@ private boolean isSpaceToken(int i) {
if (s.isEmpty()) {
return false;
} else {
// TODO(b/26984991): if the formatter starts understanding \r\n then \r should be removed here
return " \t\f\r".indexOf(s.codePointAt(0)) >= 0;
return " \t\f".indexOf(s.codePointAt(0)) >= 0;
}
}

Expand Down
Expand Up @@ -15,10 +15,10 @@
package com.google.googlejavaformat.java;

import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.googlejavaformat.CommentsHelper;
import com.google.googlejavaformat.Input.Tok;
import com.google.googlejavaformat.Newlines;
import com.google.googlejavaformat.java.javadoc.JavadocFormatter;
import java.util.ArrayList;
import java.util.Iterator;
Expand All @@ -27,8 +27,6 @@
/** {@code JavaCommentsHelper} extends {@link CommentsHelper} to rewrite Java comments. */
public final class JavaCommentsHelper implements CommentsHelper {

private static final Splitter NEWLINE_SPLITTER = Splitter.on('\n');

private final JavaFormatterOptions options;

public JavaCommentsHelper(JavaFormatterOptions options) {
Expand All @@ -45,8 +43,9 @@ public String rewrite(Tok tok, int maxWidth, int column0) {
text = JavadocFormatter.formatJavadoc(text, column0, options);
}
List<String> lines = new ArrayList<>();
for (String line : NEWLINE_SPLITTER.split(text)) {
lines.add(CharMatcher.whitespace().trimTrailingFrom(line));
Iterator<String> it = Newlines.lineIterator(text);
while (it.hasNext()) {
lines.add(CharMatcher.whitespace().trimTrailingFrom(it.next()));
}
if (tok.isSlashSlashComment()) {
return indentLineComments(lines, column0);
Expand Down

0 comments on commit 4174d87

Please sign in to comment.