-
Notifications
You must be signed in to change notification settings - Fork 134
/
LinkClassifierHub.java
71 lines (59 loc) · 2.15 KB
/
LinkClassifierHub.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
package focusedCrawler.link.classifier;
import java.net.MalformedURLException;
import java.util.Iterator;
import java.util.Map;
import focusedCrawler.link.classifier.builder.Instance;
import focusedCrawler.link.classifier.builder.LinkNeighborhoodWrapper;
import focusedCrawler.link.frontier.LinkRelevance;
import focusedCrawler.target.model.Page;
import focusedCrawler.util.parser.LinkNeighborhood;
import smile.classification.SoftClassifier;
import smile.classification.SVM;
/**
* This class implements the link classifier for the hub links.
* @author lbarbosa
*
*/
public class LinkClassifierHub implements LinkClassifier{
private SoftClassifier<double[]> classifier;
private LinkNeighborhoodWrapper wrapper;
private String[] attributes;
public LinkClassifierHub(){
}
public LinkClassifierHub(SoftClassifier<double[]> classifier, LinkNeighborhoodWrapper wrapper,String[] attributes) {
this.classifier = classifier;
this.wrapper = wrapper;
this.attributes = attributes;
}
public LinkRelevance classify(LinkNeighborhood ln) throws LinkClassifierException {
LinkRelevance result = null;
try {
if(classifier == null){
result = new LinkRelevance(ln.getLink(),LinkRelevance.DEFAULT_HUB_RELEVANCE+1);
}else{
Map<String, Instance> urlWords = wrapper.extractLinks(ln, attributes);
Iterator<String> iter = urlWords.keySet().iterator();
while(iter.hasNext()){
String url = (String)iter.next();
Instance instance = (Instance)urlWords.get(url);
double[] values = instance.getValues();
double[] prob = new double[2];
int predictedValue = ((SVM<double[]>)classifier).predict(values, prob);
double relevance = LinkRelevance.DEFAULT_HUB_RELEVANCE + prob[0]*100;
result = new LinkRelevance(ln.getLink(),relevance);
}
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
@Override
public LinkRelevance[] classify(Page page)
throws LinkClassifierException {
// TODO Auto-generated method stub
return null;
}
}