Skip to content

Commit

Permalink
Merge branch 'master' into fix-ci-and-add-java-8
Browse files Browse the repository at this point in the history
  • Loading branch information
anjackson committed Oct 12, 2017
2 parents 09ee95f + ae8a0ef commit f7a5376
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 12 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Expand Up @@ -3,9 +3,9 @@ sudo: false
language: java

jdk:
- oraclejdk8
- openjdk7
- openjdk8
- oraclejdk8

before_install:
- "export JAVA_OPTS=-Xmx1500m"
Expand Down
2 changes: 1 addition & 1 deletion commons/pom.xml
Expand Up @@ -188,7 +188,7 @@
<dependency>
<groupId>org.netpreserve.commons</groupId>
<artifactId>webarchive-commons</artifactId>
<version>1.1.5</version>
<version>1.1.8</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
Expand Down
Expand Up @@ -22,7 +22,8 @@
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Logger;

import java.util.ArrayList;
import java.util.List;
import org.archive.crawler.framework.CrawlController;
import org.archive.crawler.framework.CrawlStatus;
import org.archive.modules.CrawlURI;
Expand Down Expand Up @@ -56,6 +57,17 @@ public WARCWriterProcessor getWarcWriter() {
return warcWriter;
}

{
setWarcWriters(new ArrayList<WARCWriterProcessor>());
}
@SuppressWarnings("unchecked")
public List<WARCWriterProcessor> getWarcWriters() {
return (List<WARCWriterProcessor>) kp.get("warcWriters");
}
public void setWarcWriters(List<WARCWriterProcessor> warcWriters) {
kp.put("warcWriters", warcWriters);
}

protected CrawlController controller;
public CrawlController getCrawlController() {
return this.controller;
Expand All @@ -76,15 +88,27 @@ protected void innerProcess(CrawlURI uri) throws InterruptedException {
for (String k: limits.get(j).keySet()) {
Long limit = limits.get(j).get(k);

Map<String, AtomicLong> valueBucket = warcWriter.getStats().get(j);
if (valueBucket != null) {
AtomicLong value = valueBucket.get(k);
if (value != null
&& value.get() >= limit) {
log.info("stopping crawl because warcwriter stats['" + j + "']['" + k + "']=" + value.get() + " exceeds limit " + limit);
controller.requestCrawlStop(CrawlStatus.FINISHED_WRITE_LIMIT);
AtomicLong value = null;
if(getWarcWriters() !=null && getWarcWriters().size()>0) {
value = new AtomicLong(0);
for (WARCWriterProcessor w: getWarcWriters()) {
Map<String, AtomicLong> valueBucket = w.getStats().get(j);
if(valueBucket != null) {
value.set(value.addAndGet(valueBucket.get(k).get()));
}
}
}
else {
Map<String, AtomicLong> valueBucket = warcWriter.getStats().get(j);
if(valueBucket != null) {
value = valueBucket.get(k);
}
}
if (value != null
&& value.get() >= limit) {
log.info("stopping crawl because warcwriter stats['" + j + "']['" + k + "']=" + value.get() + " exceeds limit " + limit);
controller.requestCrawlStop(CrawlStatus.FINISHED_WRITE_LIMIT);
}
}
}
}
Expand Down
Expand Up @@ -185,8 +185,7 @@ public boolean accept(File pathname) {
assertEquals(4, wwp.getStats().get("totals").get("numRecords").get());
assertEquals(responseBytes.length, wwp.getStats().get("response").get("contentBytes").get());

// XXX fails currently, needs https://github.com/iipc/webarchive-commons/pull/51
// assertEquals(warc.length(), wwp.getStats().get("totals").get("sizeOnDisk").get());
assertEquals(warc.length(), wwp.getStats().get("totals").get("sizeOnDisk").get());
}

/**
Expand Down

0 comments on commit f7a5376

Please sign in to comment.