Skip to content

Commit

Permalink
mimetype detection: make “missing” mimetype value configurable - Java…
Browse files Browse the repository at this point in the history
… Indexer generates “application/http” instead of “unk”.
  • Loading branch information
kngenie committed Oct 2, 2014
1 parent c5ce1b3 commit 7d9d332
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.archive.wayback.memento.MementoConstants;
import org.archive.wayback.memento.MementoUtils;
import org.archive.wayback.replay.mimetype.MimeTypeDetector;
import org.archive.wayback.resourcestore.indexer.IndexWorker;
import org.archive.wayback.webapp.AccessPoint;

/**
Expand All @@ -49,6 +50,9 @@ public class SelectorReplayDispatcher implements ReplayDispatcher {
private List<MimeTypeDetector> mimeTypeDetectors = null;
private ClosestResultSelector closestSelector = null;

public static final String DEFAULT_MISSING_MIMETYPE = "unk";
private String missingMimeType = DEFAULT_MISSING_MIMETYPE;

/**
* default value for {@link #untrustfulMimeTypes}
*/
Expand All @@ -63,8 +67,8 @@ public class SelectorReplayDispatcher implements ReplayDispatcher {
* For captures whose {@code mimetype} prefix-matches any of these,
* SelectorReplayDispatcher will attempt to detect actual mime-type
* with {@code mimeTypeDetector} (if configured).
* <p>{@code unk} is always considered <i>untrustful</i>. You don't
* need to include it in this list.</p>
* <p>Value set to {@link #missingMimeType} is always considered
* <i>untrustful</i>. You don't need to include it in this list.</p>
* <p>If passed {@code null}, default {@link #DEFAULT_UNTRUSTFUL_MIMETYPES}
* will be used. If set to an empty array, detection is applied only to
* captures without {@code Content-Type} header.</p>
Expand All @@ -78,6 +82,24 @@ public void setUntrustfulMimeTypes(List<String> untrustfulMimeTypes) {
.toArray(new String[untrustfulMimeTypes.size()]);
}

/**
* Value of {@code mimetype} field indicating {@code Content-Type}
* is unavailable in the response.
* Default is {@code unk} (compatible with CDX-Writer).
* {@link IndexWorker} puts {@code application/http}, apparently.
* @param missingMimeType
*/
public void setMissingMimeType(String missingMimeType) {
if (missingMimeType == null || missingMimeType.isEmpty())
this.missingMimeType = DEFAULT_MISSING_MIMETYPE;
else
this.missingMimeType = missingMimeType;
}

public String getMissingMimeType() {
return missingMimeType;
}

/**
* check if mime-type detection is suggested for mimeType.
* @param mimeType mime-type to test (must not be null/empty/"unk")
Expand Down Expand Up @@ -110,7 +132,7 @@ public ReplayRenderer getRenderer(WaybackRequest wbRequest,
// HTTP response has valid Content-Type header. CDX writer does not fix
// it (although it's capable of fixing it internally). If CaptureSearchResult
// says mimeType is "unk", try reading Content-Type header from the resource.
if (mimeType == null || mimeType.isEmpty() || "unk".equals(mimeType)) {
if (mimeType == null || mimeType.isEmpty() || missingMimeType.equals(mimeType)) {
mimeType = resource.getHeader("Content-Type");
}
// "unk" and "" are changed to Content-Type header value (or null if in fact missing)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,33 @@ public void testMimeTypeDetector_ignoredIfForced() throws Exception {
EasyMock.verify(detector);
}

/**
* Test of non-default {@code missingMimeType}.
* @throws Exception
*/
public void testMissingMimeType() throws Exception {
final String MISSING_MIMETYPE = "application/http";

WaybackRequest wbRequest = new WaybackRequest();
CaptureSearchResult result = new CaptureSearchResult();
result.setMimeType(MISSING_MIMETYPE);
Resource resource = createTestResource(null,
"var k = 1;".getBytes("UTF-8"));

MimeTypeDetector detector = EasyMock.createMock(MimeTypeDetector.class);
EasyMock.expect(detector.sniff(resource)).andReturn("text/javascript");
cut.setMimeTypeDetectors(Collections.singletonList(detector));
cut.setMissingMimeType(MISSING_MIMETYPE);

EasyMock.replay(detector);

ReplayRenderer rr = cut.getRenderer(wbRequest, result, resource);

assertEquals("js", ((TestReplayRenderer)rr).name);
EasyMock.verify(detector);

}

// TODO: want another test for REVISIT case?

}

0 comments on commit 7d9d332

Please sign in to comment.