Skip to content

Commit

Permalink
Add configuration for max depth link classifier
Browse files Browse the repository at this point in the history
  • Loading branch information
aecio committed Jul 28, 2017
1 parent 8379674 commit 61c627d
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 5 deletions.
7 changes: 7 additions & 0 deletions src/main/java/focusedCrawler/link/LinkStorageConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ public String getMozKey() {
@JsonProperty("link_storage.persistent_hashtable.backend")
private String persistentHashtableBackend = "ROCKSDB";

@JsonProperty("link_storage.link_classifier.max_depth")
private int maxDepth;

public LinkStorageConfig() {
this.serverConfig = new StorageConfig();
}
Expand Down Expand Up @@ -219,4 +222,8 @@ public DB getPersistentHashtableBackend() {
}
}

public int getMaxDepth() {
return maxDepth;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.nio.file.Paths;

import focusedCrawler.link.LinkStorageConfig;
import focusedCrawler.util.string.StopList;
import focusedCrawler.util.string.StopListFile;

Expand All @@ -14,8 +15,9 @@ public static void setDefaultStoplist(StopList stoplist) {
LinkClassifierFactory.stoplist = stoplist;
}

public static LinkClassifier create(String modelPath, String type) {
switch (type) {
public static LinkClassifier create(String modelPath, LinkStorageConfig config) {
String typeOfClassifier = config.getTypeOfClassifier();
switch (typeOfClassifier) {
case "LinkClassifierBreadthSearch":
return new LinkClassifierBreadthSearch();
case "LinkClassifierBaseline":
Expand All @@ -34,9 +36,9 @@ public static LinkClassifier create(String modelPath, String type) {
LNClassifier lnClassifier = LNClassifier.create(featureFilePath, modelFilePath, stoplist);
return new LinkClassifierImpl(lnClassifier);
case "MaxDepthLinkClassifier":
return new MaxDepthLinkClassifier(1);
return new MaxDepthLinkClassifier(config.getMaxDepth());
default:
throw new IllegalArgumentException("Unknown link classifier type: " + type);
throw new IllegalArgumentException("Unknown link classifier type: " + typeOfClassifier);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public FrontierManager(Frontier frontier, String dataPath, String modelPath,
this.graphRepository = new BipartiteGraphRepository(dataPath, config.getPersistentHashtableBackend());
this.hostsManager = new HostManager(Paths.get(dataPath, "data_hosts"), config.getPersistentHashtableBackend());;
this.schedulerLog = new LogFile(Paths.get(dataPath, "data_monitor", "scheduledlinks.csv"));
this.outlinkClassifier = LinkClassifierFactory.create(modelPath, config.getTypeOfClassifier());
this.outlinkClassifier = LinkClassifierFactory.create(modelPath, config);
if (config.getBacklinks()) {
this.backlinkSurfer = new BacklinkSurfer(config.getBackSurferConfig());
this.backlinkClassifier = new LinkClassifierHub();
Expand Down

0 comments on commit 61c627d

Please sign in to comment.