Skip to content
This repository has been archived by the owner on Jun 23, 2022. It is now read-only.

Commit

Permalink
Fix b/27579240: Feed large directory listings asynchronously to avoid…
Browse files Browse the repository at this point in the history
… timeouts.

This change returns at most maxHtmlSize links in the generated HTML
of a directory listing. If the directory has more than maxHtmlSize
entries, the directory stream is closed and the generated HTML is
returned as the response. A separate thread will then be launched
to push the directory entries via the DocIdPusher. This should avoid
getDocContent timeouts reading large directories.
  • Loading branch information
BrettMichaelJohnson committed Jan 12, 2017
1 parent c99079e commit d654ca5
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 87 deletions.
159 changes: 87 additions & 72 deletions src/com/google/enterprise/adaptor/fs/FsAdaptor.java
Expand Up @@ -18,13 +18,15 @@
import static java.util.Locale.ENGLISH;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.enterprise.adaptor.AbstractAdaptor;
import com.google.enterprise.adaptor.Acl;
import com.google.enterprise.adaptor.Acl.InheritanceType;
Expand All @@ -47,8 +49,6 @@
import com.google.enterprise.adaptor.UserPrincipal;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
Expand All @@ -66,6 +66,7 @@
import java.nio.file.Paths;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileTime;
import java.text.MessageFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
Expand All @@ -86,6 +87,8 @@
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;
Expand Down Expand Up @@ -364,6 +367,9 @@ private enum PreserveLastAccessTime { NEVER, IF_ALLOWED, ALWAYS };

private boolean resultLinksToShare;

/** ExecutorService for asychronous pushing of large directory content. */
private ExecutorService asyncDirectoryPusherService;

public FsAdaptor() {
// At the moment, we only support Windows.
if (System.getProperty("os.name").startsWith("Windows")) {
Expand Down Expand Up @@ -518,6 +524,10 @@ public void init(AdaptorContext context) throws Exception {
config.getValue(CONFIG_MONITOR_UPDATES));
log.log(Level.CONFIG, "monitorForUpdates: {0}", monitorForUpdates);

// Service for pushing large directory contents asynchronously.
asyncDirectoryPusherService = Executors.newFixedThreadPool(
Integer.parseInt(config.getValue("server.maxWorkerThreads")));

// How often to update file systems Status for Dashboard, in minutes.
long minutes =
Integer.parseInt(config.getValue(CONFIG_STATUS_UPDATE_INTERVAL_MINS));
Expand Down Expand Up @@ -563,6 +573,9 @@ public void run() {

@Override
public void destroy() {
if (asyncDirectoryPusherService != null) {
asyncDirectoryPusherService.shutdownNow();
}
statusUpdateService.cancel();
delegate.destroy();
}
Expand Down Expand Up @@ -1060,6 +1073,8 @@ private void getFileAcls(Path doc, Response resp) throws IOException {
/* Makes HTML document with web links to namespace's DFS links. */
private void getDfsNamespaceContent(Path doc, DocId docid, Response resp)
throws IOException {
// TODO (bmj): Handle large Namespaces by paying attention to maxHtmlLinks
// and sending excess to DocIdPusher.
resp.setNoIndex(!indexFolders);
try (HtmlResponseWriter writer = createHtmlResponseWriter(resp)) {
writer.start(docid, getFileName(doc));
Expand All @@ -1078,88 +1093,88 @@ private void getDfsNamespaceContent(Path doc, DocId docid, Response resp)
}
}

/* Makes HTML document with links this directory's files and folder. */
/* Makes HTML document with links to this directory's files and folders. */
private void getDirectoryContent(Path doc, DocId docid,
FileTime lastAccessTime, Response resp) throws IOException {
resp.setNoIndex(!indexFolders);
try (DirectoryStream<Path> files = delegate.newDirectoryStream(doc)) {
// Large directories can have tens or hundreds of thousands of files.
// The GSA truncates large HTML documents at 2.5MB, so return
// the first maxHtmlLinks worth as HTML content and the rest as external
// anchors. But we cannot start writing the HTML content to the response
// until after we add all the external anchor metadata. So spool the HTML
// for now and copy it to the response later.
SharedByteArrayOutputStream htmlOut = new SharedByteArrayOutputStream();
Writer writer = new OutputStreamWriter(htmlOut, CHARSET);
try (HtmlResponseWriter htmlWriter =
new HtmlResponseWriter(writer, docIdEncoder, Locale.ENGLISH)) {
htmlWriter.start(docid, getFileName(doc));
int htmlLinks = 0;
for (Path file : files) {
DocId docId;
try {
docId = delegate.newDocId(file);
} catch (IllegalArgumentException e) {
log.log(Level.WARNING, "Skipping {0} because {1}.",
new Object[] { file, e.getMessage() });
continue;
}
if (htmlLinks++ < maxHtmlLinks) {
// Add an HTML link.
htmlWriter.addLink(docId, getFileName(file));
} else {
// Add an external anchor.
resp.addAnchor(docIdEncoder.encodeDocId(docId),
getFileName(file));
}

// Large directories can have tens or hundreds of thousands of files.
// The GSA truncates large HTML documents at 2.5MB, so return the first
// maxHtmlLinks worth as HTML content and send the rest to the DocIdPusher.
try (DirectoryStream<Path> files = delegate.newDirectoryStream(doc);
HtmlResponseWriter htmlWriter = createHtmlResponseWriter(resp)) {
htmlWriter.start(docid, getFileName(doc));
int htmlLinks = 0;
for (Path file : files) {
DocId docId = delegate.newDocId(file);
if (htmlLinks++ < maxHtmlLinks) {
// Add an HTML link.
htmlWriter.addLink(docId, getFileName(file));
} else {
String message = MessageFormat.format(
"Directory listing for {0} exceeds maxHtmlSize of {1,number,#}. "
+ "Switching to asynchronous feed of directory content.",
doc, maxHtmlLinks);
htmlWriter.addHtml(
"<p>" + htmlWriter.escapeContent(message) + "</p>");
log.log(Level.FINE, message);
asyncDirectoryPusherService.submit(
new AsyncDirectoryContentPusher(doc, lastAccessTime));
break;
}
htmlWriter.finish();
}
// Finally, write out the generated HTML links as content.
resp.setContentType("text/html; charset=" + CHARSET.name());
copyStream(htmlOut.getInputStream(), resp.getOutputStream());
htmlWriter.finish();
} finally {
setLastAccessTime(doc, lastAccessTime);
}
}

/**
* This implementation adds a {@link #getInputStream} method that
* shares the buffer with this output stream. The input stream
* cannot be obtained until the output stream is closed.
*/
private static class SharedByteArrayOutputStream
extends ByteArrayOutputStream {
public SharedByteArrayOutputStream() {
super();
}

public SharedByteArrayOutputStream(int size) {
super(size);
}

/** Is this output stream open? */
private boolean isOpen = true;

/** Marks this output stream as closed. */
@Override
public void close() throws IOException {
isOpen = false;
super.close();
/* Feeds the directory's files and folders to the DocIdPusher. */
private class AsyncDirectoryContentPusher implements Runnable {
private final Path dir;
private final FileTime lastAccessTime;

public AsyncDirectoryContentPusher(Path dir, FileTime lastAccessTime) {
this.dir = dir;
this.lastAccessTime = lastAccessTime;
}

public void run() {
log.log(Level.FINE, "Pushing contents of directory {0}",
getFileName(dir));
try (DirectoryStream<Path> paths = delegate.newDirectoryStream(dir)) {
context.getDocIdPusher().pushDocIds(
Iterables.transform(paths,
new Function<Path, DocId>() {
@Override
public DocId apply(Path path) {
try {
DocId docId = delegate.newDocId(path);
log.log(Level.FINER, "Pushing docid {0}", docId);
return docId;
} catch (IOException e) {
throw new WrappedException(
"Failed to create DocId from path " + path, e);
}
}
}));
} catch (IOException | WrappedException | InterruptedException e) {
log.log(Level.WARNING, "Failed to push contents of directory " + dir,
e);
} finally {
try {
setLastAccessTime(dir, lastAccessTime);
} catch (IOException e) {
log.log(Level.WARNING, "Failed restore last access time for " + dir,
e);
}
}
}
}

/**
* Gets a <code>ByteArrayInputStream</code> that shares the
* output buffer, without copying it.
*
* @return a <code>ByteArrayInputStream</code>
* @throws IOException if the output stream is open
*/
public ByteArrayInputStream getInputStream() throws IOException {
if (isOpen) {
throw new IOException("Output stream is open.");
}
return new ByteArrayInputStream(buf, 0, count);
private static class WrappedException extends RuntimeException {
public WrappedException(String message, Exception cause) {
super(message, cause);
}
}

Expand Down
19 changes: 17 additions & 2 deletions src/com/google/enterprise/adaptor/fs/HtmlResponseWriter.java
Expand Up @@ -111,6 +111,21 @@ public void addLink(DocId doc, String label) throws IOException {
writer.write("</a></li>");
}

/**
* Writes the supplied HTML fragment to the writer.
*
* @param htmlText A fragement of HTML to write
*/
public void addHtml(String htmlText) throws IOException {
if (state != State.STARTED) {
throw new IllegalStateException("In unexpected state: " + state);
}
if (htmlText == null) {
throw new NullPointerException();
}
writer.write(htmlText);
}

/**
* Complete HTML body and flush.
*/
Expand Down Expand Up @@ -231,11 +246,11 @@ private String computeLabel(String label, DocId doc) {
return label;
}

private String escapeContent(String raw) {
public String escapeContent(String raw) {
return raw.replace("&", "&amp;").replace("<", "&lt;");
}

private String escapeAttributeValue(String raw) {
public String escapeAttributeValue(String raw) {
return escapeContent(raw).replace("\"", "&quot;").replace("'", "&apos;");
}
}
43 changes: 30 additions & 13 deletions test/com/google/enterprise/adaptor/fs/FsAdaptorTest.java
Expand Up @@ -22,6 +22,7 @@
import static java.nio.file.attribute.AclEntryType.*;
import static org.junit.Assert.*;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.enterprise.adaptor.Acl;
Expand Down Expand Up @@ -1176,12 +1177,12 @@ public void testGetDocContentDirectoryHtmlLinksOnly() throws Exception {
}

@Test
public void testGetDocContentDirectoryExternalAnchorsOnly() throws Exception {
public void testGetDocContentDirectoryPushedDocIdsOnly() throws Exception {
testMaxHtmlLinks(0);
}

@Test
public void testGetDocContentDirectoryHtmlLinksAndAnchors() throws Exception {
public void testGetDocContentDirectoryHtmlLinksAndDocIds() throws Exception {
testMaxHtmlLinks(2);
}

Expand Down Expand Up @@ -1218,23 +1219,39 @@ private void testGetDocContentDirectory(Path path, String label,
+ "Folder " + label + "</title></head><body><h1>Folder " + label
+ "</h1>"));

// Verify the links and anchors.
// Verify the HTML links.
int i;
for (i = 0; i < files.length && i < maxHtmlLinks; i++) {
for (i = 0; i < files.length; i++) {
String file = files[i];
String expectedLink = "<li><a href=\"" + file
+ (file.contains("dir") ? "/" : "") + "\">" + file + "</a></li>";
assertTrue(html, html.contains(expectedLink));
if (i < maxHtmlLinks) {
assertTrue(html, html.contains(expectedLink));
} else {
assertFalse(html, html.contains(expectedLink));
}
}
for (; i < files.length; i++) {
String file = files[i];
URI uri = context.getDocIdEncoder().encodeDocId(new DocId(
(file.contains("dir") ? file + "/" : file)));
URI anchor = response.anchors.get(file);
assertNotNull("File " + file + " with URI " + uri + " is missing"
+ " from response:/n" + html + "/n" + response.anchors, anchor);
assertEquals(uri, anchor);

// If the number of children exceeds the maxHtmlLinks, they should
// all have been pushed to the DocIdPusher asynchronously.
Thread.sleep(100); // Wait for AsyncDirectoryContentPusher thread to finish.
List<Record> expected;
if (files.length > maxHtmlLinks) {
assertTrue(html, html.contains("<p>Directory listing for "));
assertTrue(html, html.contains(" exceeds maxHtmlSize of "));
ImmutableList.Builder<Record> builder = ImmutableList.builder();
for (String file : files) {
builder.add(new Record.Builder(getDocId(file)).build());
}
expected = builder.build();
} else {
// Nothing should have been pushed.
expected = ImmutableList.of();
}
assertEquals(expected, pusher.getRecords());

// There should be no external anchors.
assertEquals(ImmutableMap.<String, URI>of(), response.anchors);
}

@Test
Expand Down

0 comments on commit d654ca5

Please sign in to comment.