Skip to content
Permalink
Browse files

[FIXED JENKINS-15382]

Revisiting the fix. Avoid parsing the entire text by only looking at the tail portion.

(cherry picked from commit 14d980c)
  • Loading branch information...
kohsuke authored and olivergondza committed Jul 2, 2013
1 parent f30ee33 commit a3512b9fe983cbbdb3b78597086822a61abb81ff
@@ -55,6 +55,9 @@
<!-- Record your changes in the trunk here. -->
<div id="trunk" style="display:none"><!--=TRUNK-BEGIN=-->
<ul class=image>
<li class=bug>
Improved memory efficiency in parsing test reports with large stdio output files.
(<a href="https://issues.jenkins-ci.org/browse/JENKINS-15382">issue 15382</a>)
<li class=rfe>
Can't build using maven 3.1.0
(<a href="https://issues.jenkins-ci.org/browse/JENKINS-15935">issue 15935</a>)
@@ -23,13 +23,17 @@
*/
package hudson.tasks.junit;

import hudson.util.TextFile;
import org.apache.commons.io.FileUtils;
import org.jvnet.localizer.Localizable;
import hudson.model.AbstractBuild;
import hudson.model.Run;
import hudson.tasks.test.TestResult;
import org.dom4j.Element;
import org.kohsuke.stapler.export.Exported;

import java.io.File;
import java.io.IOException;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.util.*;
@@ -138,14 +142,9 @@ static String possiblyTrimStdio(Collection<CaseResult> results, boolean keepLong
if (stdio == null) {
return null;
}
if (keepLongStdio) {
if (!isTrimming(results, keepLongStdio)) {
return stdio.toString();
}
for (CaseResult result : results) {
if (result.errorStackTrace != null) {
return stdio.toString();
}
}
int len = stdio.length();
int middle = len - HALF_MAX_SIZE * 2;
if (middle <= 0) {
@@ -154,6 +153,47 @@ static String possiblyTrimStdio(Collection<CaseResult> results, boolean keepLong
return stdio.subSequence(0, HALF_MAX_SIZE) + "\n...[truncated " + middle + " chars]...\n" + stdio.subSequence(len - HALF_MAX_SIZE, len);
}

/**
* Flavor of {@link #possiblyTrimStdio(Collection, boolean, CharSequence)} that doesn't try to read the whole thing into memory.
*/
static String possiblyTrimStdio(Collection<CaseResult> results, boolean keepLongStdio, File stdio) throws IOException {
if (!isTrimming(results, keepLongStdio) && stdio.length()<1024*1024) {
return FileUtils.readFileToString(stdio);
}

long len = stdio.length();
long middle = len - HALF_MAX_SIZE * 2;
if (middle <= 0) {
return FileUtils.readFileToString(stdio);
}

TextFile tx = new TextFile(stdio);
String head = tx.head(HALF_MAX_SIZE);
String tail = tx.fastTail(HALF_MAX_SIZE);

int headBytes = head.getBytes().length;
int tailBytes = tail.getBytes().length;

middle = len - (headBytes+tailBytes);
if (middle<=0) {
// if it turns out that we didn't have any middle section, just return the whole thing
return FileUtils.readFileToString(stdio);
}

return head + "\n...[truncated " + middle + " bytes]...\n" + tail;
}

private static boolean isTrimming(Collection<CaseResult> results, boolean keepLongStdio) {
if (keepLongStdio) return false;
for (CaseResult result : results) {
// if there's a failure, do not trim and keep the whole thing
if (result.errorStackTrace != null)
return false;
}
return true;
}


/**
* Used to create a fake failure, when Hudson fails to load data from XML files.
*/
@@ -209,14 +209,7 @@ private SuiteResult(File xmlReport, Element suite, boolean keepLongStdio) throws
File mavenOutputFile = new File(xmlReport.getParentFile(),m.group(1)+"-output.txt");
if (mavenOutputFile.exists()) {
try {
RandomAccessFile raf = new RandomAccessFile(mavenOutputFile, "r");
try {
ByteBuffer bb = raf.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, mavenOutputFile.length());
CharBuffer cb = Charset.defaultCharset().decode(bb);
stdout = CaseResult.possiblyTrimStdio(cases, keepLongStdio, cb);
} finally {
raf.close();
}
stdout = CaseResult.possiblyTrimStdio(cases, keepLongStdio, mavenOutputFile);
} catch (IOException e) {
throw new IOException2("Failed to read "+mavenOutputFile,e);
}
@@ -98,7 +98,14 @@
* Creates an empty result.
*/
public TestResult() {
keepLongStdio = false;
this(false);
}

/**
* @since 1.522
*/
public TestResult(boolean keepLongStdio) {
this.keepLongStdio = keepLongStdio;
}

@Deprecated
@@ -23,13 +23,18 @@
*/
package hudson.util;

import javax.annotation.Nonnull;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.RandomAccessFile;
import java.io.Reader;
import java.io.StringWriter;
import java.nio.charset.Charset;

/**
* Represents a text file.
@@ -84,6 +89,83 @@ public void write(String text) throws IOException {
}
}

/**
* Reads the first N characters or until we hit EOF.
*/
public @Nonnull String head(int numChars) throws IOException {
char[] buf = new char[numChars];
int read = 0;
Reader r = new FileReader(file);

try {
while (read<numChars) {
int d = r.read(buf,read,buf.length-read);
if (d<0)
break;
read += d;
}

return new String(buf,0,read);
} finally {
IOUtils.closeQuietly(r);
}
}

/**
* Efficiently reads the last N characters (or shorter, if the whole file is shorter than that.)
*
* <p>
* This method first tries to just read the tail section of the file to get the necessary chars.
* To handle multi-byte variable length encoding (such as UTF-8), we read a larger than
* necessary chunk.
*
* <p>
* Some multi-byte encoding, such as Shift-JIS (http://en.wikipedia.org/wiki/Shift_JIS) doesn't
* allow the first byte and the second byte of a single char to be unambiguously identified,
* so it is possible that we end up decoding incorrectly if we start reading in the middle of a multi-byte
* character. All the CJK multi-byte encodings that I know of are self-correcting; as they are ASCII-compatible,
* any ASCII characters or control characters will bring the decoding back in sync, so the worst
* case we just have some garbage in the beginning that needs to be discarded. To accommodate this,
* we read additional 1024 bytes.
*
* <p>
* Other encodings, such as UTF-8, are better in that the character boundary is unambiguous,
* so there can be at most one garbage char. For dealing with UTF-16 and UTF-32, we read at
* 4 bytes boundary (all the constants and multipliers are multiples of 4.)
*
* <p>
* Note that it is possible to construct a contrived input that fools this algorithm, and in this method
* we are willing to live with a small possibility of that to avoid reading the whole text. In practice,
* such an input is very unlikely.
*
* <p>
* So all in all, this algorithm should work decently, and it works quite efficiently on a large text.
*/
public @Nonnull String fastTail(int numChars, Charset cs) throws IOException {
RandomAccessFile raf = new RandomAccessFile(file,"r");

long len = raf.length();
// err on the safe side and assume each char occupies 4 bytes
// additional 1024 byte margin is to bring us back in sync in case we started reading from non-char boundary.
long pos = Math.max(0, len - (numChars*4+1024));
raf.seek(pos);

byte[] tail = new byte[(int) (len-pos)];
raf.readFully(tail);

String tails = new String(tail,cs);

return new String(tails.substring(Math.max(0,tails.length()-numChars))); // trim the baggage of substring by allocating a new String
}

/**
* Uses the platform default encoding.
*/
public @Nonnull String fastTail(int numChars) throws IOException {
return fastTail(numChars,Charset.defaultCharset());
}


public String readTrim() throws IOException {
return read().trim();
}
@@ -0,0 +1,102 @@
package hudson.util

import org.junit.After
import org.junit.Test

import java.nio.charset.Charset

/**
*
*
* @author Kohsuke Kawaguchi
*/
class TextFileTest {
List<File> files = [];

@After
void tearDown() {
files*.delete()
}

@Test
public void head() {
def f = newFile()
f.text = getClass().getResource("ascii.txt").text

def t = new TextFile(f)
def first35 = "Lorem ipsum dolor sit amet, consect"
assert t.head(35).equals(first35)
assert first35.length()==35
}

@Test
public void shortHead() {
def f = newFile()
f.text = "hello"

def t = new TextFile(f)
assert t.head(35).equals("hello")
}

@Test
public void tail() {
def f = newFile()
f.text = getClass().getResource("ascii.txt").text

def t = new TextFile(f)
def tail35 = "la, vitae interdum quam rutrum id.\n"
assert t.fastTail(35).equals(tail35)
assert tail35.length()==35
}

@Test
public void shortTail() {
def f = newFile()
f.text = "hello"

def t = new TextFile(f)
assert t.fastTail(35).equals("hello")
}

/**
* Shift JIS is a multi-byte character encoding.
*
* In it, 0x82 0x83 is \u30e2, and 0x83 0x82 is \uFF43.
* So if aren't careful, we'll parse the text incorrectly.
*/
@Test
public void tailShiftJIS() {
def f = newFile()

def t = new TextFile(f)

f.withOutputStream { o ->
(1..80).each {
(1..40).each {
o.write(0x83)
o.write(0x82)
}
o.write(0x0A);
}
}

def tail = t.fastTail(35, Charset.forName("Shift_JIS"))
assert tail.equals("\u30e2"*34+"\n")
assert tail.length()==35

// add one more byte to force fastTail to read from one byte ahead
// between this and the previous case, it should start parsing text incorrectly, until it hits NL
// where it comes back in sync
f.append([0x0A] as byte[])

tail = t.fastTail(35, Charset.forName("Shift_JIS"))
assert tail.equals("\u30e2"*33+"\n\n")
assert tail.length()==35
}

def newFile() {
def f = File.createTempFile("foo", "txt")
files.add(f)
return f
}
}
@@ -0,0 +1,54 @@
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque porta
consectetur sapien vel sodales. Cras volutpat odio ipsum, ac euismod turpis
volutpat pulvinar. Mauris at lorem fringilla, aliquet erat vel, tempor odio.
Sed vel elit eget libero adipiscing pulvinar vitae at lorem. Mauris lacinia dui
quis fermentum fermentum. Pellentesque dapibus elementum porta. Sed aliquam
neque non orci aliquet, vitae accumsan tortor pretium. Sed vel nisi ultrices,
vestibulum tortor non, interdum lectus. Integer euismod quam eros, quis semper
lacus dignissim at. Etiam blandit volutpat augue quis fermentum. Suspendisse
vitae massa in odio luctus commodo eu eget ante. Nunc pretium sodales nisl in
ullamcorper. Nam interdum, leo ac malesuada convallis, ipsum leo tristique
purus, sit amet volutpat lorem nulla ut dui. Pellentesque eget nunc ut orci
elementum sagittis. Proin enim metus, consectetur et fringilla sed, scelerisque
eu risus. Nullam et augue placerat, hendrerit lacus at, bibendum sem.

Suspendisse venenatis nulla vitae arcu placerat lobortis. Vestibulum eleifend
luctus lacus, sit amet suscipit lorem ultrices vitae. Praesent eu porta elit.
Phasellus hendrerit mattis libero, in sollicitudin neque tristique a. In hac
habitasse platea dictumst. Maecenas eget bibendum orci, eu dapibus quam. Donec
gravida dapibus diam, vel aliquet sem hendrerit facilisis. Maecenas id nisi
lacus. Ut mollis interdum ligula, sed mattis enim cursus ac. Proin nec arcu a
neque dignissim iaculis ac non enim. Interdum et malesuada fames ac ante ipsum
primis in faucibus. Aliquam rutrum hendrerit lacus, ut facilisis nisl
pellentesque vel. Nunc ut ultricies turpis. Praesent vel orci iaculis, sagittis
nunc at, bibendum nulla. Interdum et malesuada fames ac ante ipsum primis in
faucibus. Sed scelerisque lectus vel nisi malesuada, eget pretium velit
porttitor.

Proin porta nibh ut urna placerat facilisis. Praesent at nisi malesuada,
lacinia eros a, fermentum orci. Maecenas at semper elit. Morbi sit amet tempus
tellus. Duis convallis sollicitudin odio vitae volutpat. Curabitur nec arcu
eget tellus elementum ultrices non at dolor. Vivamus convallis velit a neque
posuere sollicitudin. Donec quis est adipiscing tortor dignissim pellentesque
ut ut lacus. Nam sit amet porttitor purus, sit amet pulvinar quam. In luctus
porttitor scelerisque. Duis dapibus pharetra sem quis auctor. Cras pulvinar
faucibus volutpat. Donec vitae ligula fringilla, rhoncus leo egestas,
sollicitudin est. Class aptent taciti sociosqu ad litora torquent per conubia
nostra, per inceptos himenaeos.

Cras id luctus ipsum. Donec quis congue urna. Praesent dictum mattis sapien,
eget placerat ipsum rutrum et. Etiam gravida egestas odio, sit amet molestie
leo tempor vel. Aenean lacus dui, commodo sed velit in, consequat ultricies
velit. Nulla facilisi. Aenean sed vulputate odio. Interdum et malesuada fames
ac ante ipsum primis in faucibus. Nulla eleifend, metus nec sodales lobortis,
purus metus aliquam odio, vel lacinia nisi nulla eu erat. Donec tincidunt, leo
gravida hendrerit feugiat, nibh velit fermentum urna, non sodales urna velit
tincidunt orci. In bibendum justo eleifend molestie suscipit. Cras luctus urna
sit amet consectetur imperdiet. Aliquam pretium faucibus lacus, et pretium
tellus rhoncus fermentum.

Sed rhoncus varius accumsan. Nulla facilisi. In vehicula nec diam a bibendum.
Cum sociis natoque penatibus et magnis dis parturient montes, nascetur
ridiculus mus. Suspendisse ullamcorper faucibus risus in ultrices. Integer
dignissim ultrices eros, quis blandit purus ullamcorper in. Suspendisse laoreet
aliquet nulla, vitae interdum quam rutrum id.

0 comments on commit a3512b9

Please sign in to comment.
You can’t perform that action at this time.