Skip to content

Commit

Permalink
Merge branch 'ari-5630' into ait-qa
Browse files Browse the repository at this point in the history
* ari-5630:
  catch exceptions scoping outlinks to stop them from derailing processing of the parent url
  fix for test failures in a workspace on NFS-mounted filesystem
  max size for extracted form elements
  • Loading branch information
nlevitt committed Jan 17, 2018
2 parents 9169eb8 + 9575914 commit 74c4865
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
Expand Up @@ -19,11 +19,13 @@

package org.archive.crawler.prefetch;

import static org.archive.modules.fetcher.FetchStatusCodes.S_OUT_OF_SCOPE;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.archive.crawler.framework.Scoper;
import org.archive.modules.CrawlURI;
import org.archive.modules.ProcessResult;
import org.archive.modules.fetcher.FetchStatusCodes;

/**
* Simple single-URI scoper, considers passed-in URI as candidate; sets
Expand All @@ -35,11 +37,19 @@ public class CandidateScoper extends Scoper {
@SuppressWarnings("unused")
private static final long serialVersionUID = 1L;

private static final Logger logger = Logger.getLogger(CandidateScoper.class.getName());

@Override
protected ProcessResult innerProcessResult(CrawlURI curi) throws InterruptedException {
if (!isInScope(curi)) {
// Scope rejected
curi.setFetchStatus(S_OUT_OF_SCOPE);
try {
if (!isInScope(curi)) {
// Scope rejected
curi.setFetchStatus(FetchStatusCodes.S_OUT_OF_SCOPE);
return ProcessResult.FINISH;
}
} catch (Exception e) {
curi.setFetchStatus(FetchStatusCodes.S_RUNTIME_EXCEPTION);
logger.log(Level.SEVERE, "problem scoping " + curi, e);
return ProcessResult.FINISH;
}
return ProcessResult.PROCEED;
Expand Down
Expand Up @@ -128,6 +128,14 @@ protected BdbModule bdb() throws IOException {
return bdb;
}

@Override
protected void tearDown() throws Exception {
if (bdb != null) {
bdb.close();
}
super.tearDown();
}

public void testBasics() throws InterruptedException, IOException {
historyStore().store.clear();
assertTrue(historyStore().store.isEmpty());
Expand Down

0 comments on commit 74c4865

Please sign in to comment.