Skip to content

Commit

Permalink
Cdx Gen: Add -new-canon-classic and -new-canon-surt to the canonicali…
Browse files Browse the repository at this point in the history
…zer, also use 11 field format

with new canon
  • Loading branch information
ikreymer committed Mar 28, 2013
1 parent de92848 commit b8315ed
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@

public class CDXFormatIndex extends CDXIndex {
public final static String CDX_HEADER_MAGIC = " CDX N b a m s k r M V g";

// New 11-Field Format, part of new canon
public final static String CDX_HEADER_MAGIC_NEW = " CDX N b a m s k r M S V g";

private CDXFormat format = null;
private long lastMod = -1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.archive.wayback.resourcestore.locationdb.ResourceFileLocationDB;
import org.archive.wayback.util.url.AggressiveUrlCanonicalizer;
import org.archive.wayback.util.url.IdentityUrlCanonicalizer;
import org.archive.wayback.util.url.KeyMakerUrlCanonicalizer;

/**
* Simple worker, which gets tasks from an IndexQueue, in the case, the name
Expand Down Expand Up @@ -167,6 +168,12 @@ public static void main(String[] args) {
if(args[idx].equals("-identity")) {
canonicalizer = new IdentityUrlCanonicalizer();
isIdentity = true;
} else if(args[idx].equals("-new-canon-classic")) {
canonicalizer = new KeyMakerUrlCanonicalizer(false);
cdxSpec = CDXFormatIndex.CDX_HEADER_MAGIC_NEW;
} else if(args[idx].equals("-new-canon-surt")) {
canonicalizer = new KeyMakerUrlCanonicalizer(true);
cdxSpec = CDXFormatIndex.CDX_HEADER_MAGIC_NEW;
} else if(args[idx].equals("-format")) {
idx++;
if(idx >= args.length) {
Expand Down

0 comments on commit b8315ed

Please sign in to comment.